@@ -1257,6 +1257,51 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
1257
1257
StoreEv, LoadEv, BECount);
1258
1258
}
1259
1259
1260
+ class MemmoveVerifier {
1261
+ public:
1262
+ explicit MemmoveVerifier (const Value &LoadBasePtr, const Value &StoreBasePtr,
1263
+ const DataLayout &DL)
1264
+ : DL(DL), LoadOff(0 ), StoreOff(0 ),
1265
+ BP1(llvm::GetPointerBaseWithConstantOffset(
1266
+ LoadBasePtr.stripPointerCasts(), LoadOff, DL)),
1267
+ BP2(llvm::GetPointerBaseWithConstantOffset(
1268
+ StoreBasePtr.stripPointerCasts(), StoreOff, DL)),
1269
+ IsSameObject(BP1 == BP2) {}
1270
+
1271
+ bool loadAndStoreMayFormMemmove (unsigned StoreSize, bool IsNegStride,
1272
+ const Instruction &TheLoad,
1273
+ bool IsMemCpy) const {
1274
+ if (IsMemCpy) {
1275
+ // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr
1276
+ // for negative stride.
1277
+ if ((!IsNegStride && LoadOff <= StoreOff) ||
1278
+ (IsNegStride && LoadOff >= StoreOff))
1279
+ return false ;
1280
+ } else {
1281
+ // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr
1282
+ // for negative stride. LoadBasePtr shouldn't overlap with StoreBasePtr.
1283
+ int64_t LoadSize =
1284
+ DL.getTypeSizeInBits (TheLoad.getType ()).getFixedSize () / 8 ;
1285
+ if (BP1 != BP2 || LoadSize != int64_t (StoreSize))
1286
+ return false ;
1287
+ if ((!IsNegStride && LoadOff < StoreOff + int64_t (StoreSize)) ||
1288
+ (IsNegStride && LoadOff + LoadSize > StoreOff))
1289
+ return false ;
1290
+ }
1291
+ return true ;
1292
+ }
1293
+
1294
+ private:
1295
+ const DataLayout &DL;
1296
+ int64_t LoadOff;
1297
+ int64_t StoreOff;
1298
+ const Value *BP1;
1299
+ const Value *BP2;
1300
+
1301
+ public:
1302
+ const bool IsSameObject;
1303
+ };
1304
+
1260
1305
bool LoopIdiomRecognize::processLoopStoreOfLoopLoad (
1261
1306
Value *DestPtr, Value *SourcePtr, const SCEV *StoreSizeSCEV,
1262
1307
MaybeAlign StoreAlign, MaybeAlign LoadAlign, Instruction *TheStore,
@@ -1321,10 +1366,10 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
1321
1366
bool IsMemCpy = isa<MemCpyInst>(TheStore);
1322
1367
const StringRef InstRemark = IsMemCpy ? " memcpy" : " load and store" ;
1323
1368
1324
- bool UseMemMove =
1369
+ bool LoopAccessStore =
1325
1370
mayLoopAccessLocation (StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
1326
1371
StoreSizeSCEV, *AA, IgnoredInsts);
1327
- if (UseMemMove ) {
1372
+ if (LoopAccessStore ) {
1328
1373
// For memmove case it's not enough to guarantee that loop doesn't access
1329
1374
// TheStore and TheLoad. Additionally we need to make sure that TheStore is
1330
1375
// the only user of TheLoad.
@@ -1363,34 +1408,32 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
1363
1408
// the load memory locations. So remove it from the ignored stores.
1364
1409
if (IsMemCpy)
1365
1410
IgnoredInsts.erase (TheStore);
1411
+ MemmoveVerifier Verifier (*LoadBasePtr, *StoreBasePtr, *DL);
1366
1412
if (mayLoopAccessLocation (LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
1367
1413
StoreSizeSCEV, *AA, IgnoredInsts)) {
1368
- ORE.emit ([&]() {
1369
- return OptimizationRemarkMissed (DEBUG_TYPE, " LoopMayAccessLoad" , TheLoad)
1370
- << ore::NV (" Inst" , InstRemark) << " in "
1371
- << ore::NV (" Function" , TheStore->getFunction ())
1372
- << " function will not be hoisted: "
1373
- << ore::NV (" Reason" , " The loop may access load location" );
1374
- });
1375
- return Changed;
1376
- }
1377
- if (UseMemMove) {
1378
- // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr for
1379
- // negative stride. LoadBasePtr shouldn't overlap with StoreBasePtr.
1380
- int64_t LoadOff = 0 , StoreOff = 0 ;
1381
- const Value *BP1 = llvm::GetPointerBaseWithConstantOffset (
1382
- LoadBasePtr->stripPointerCasts (), LoadOff, *DL);
1383
- const Value *BP2 = llvm::GetPointerBaseWithConstantOffset (
1384
- StoreBasePtr->stripPointerCasts (), StoreOff, *DL);
1385
- int64_t LoadSize =
1386
- DL->getTypeSizeInBits (TheLoad->getType ()).getFixedSize () / 8 ;
1387
- if (BP1 != BP2 || LoadSize != int64_t (StoreSize))
1414
+ if (!IsMemCpy) {
1415
+ ORE.emit ([&]() {
1416
+ return OptimizationRemarkMissed (DEBUG_TYPE, " LoopMayAccessLoad" ,
1417
+ TheLoad)
1418
+ << ore::NV (" Inst" , InstRemark) << " in "
1419
+ << ore::NV (" Function" , TheStore->getFunction ())
1420
+ << " function will not be hoisted: "
1421
+ << ore::NV (" Reason" , " The loop may access load location" );
1422
+ });
1388
1423
return Changed;
1389
- if ((!IsNegStride && LoadOff < StoreOff + int64_t (StoreSize)) ||
1390
- (IsNegStride && LoadOff + LoadSize > StoreOff))
1424
+ }
1425
+ // At this point loop may access load only for memcpy in same underlying
1426
+ // object. If that's not the case bail out.
1427
+ if (!Verifier.IsSameObject )
1391
1428
return Changed;
1392
1429
}
1393
1430
1431
+ bool UseMemMove = IsMemCpy ? Verifier.IsSameObject : LoopAccessStore;
1432
+ if (UseMemMove)
1433
+ if (!Verifier.loadAndStoreMayFormMemmove (StoreSize, IsNegStride, *TheLoad,
1434
+ IsMemCpy))
1435
+ return Changed;
1436
+
1394
1437
if (avoidLIRForMultiBlockLoop ())
1395
1438
return Changed;
1396
1439
0 commit comments