@@ -1367,8 +1367,9 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1367
1367
return true ;
1368
1368
}
1369
1369
1370
- // / Determine whether the instruction has undefined content for the given Size,
1371
- // / either because it was freshly alloca'd or started its lifetime.
1370
+ // / Determine whether the pointer V had only undefined content from Def up to
1371
+ // / the given Size, either because it was freshly alloca'd or started its
1372
+ // / lifetime.
1372
1373
static bool hasUndefContents (MemorySSA *MSSA, BatchAAResults &AA, Value *V,
1373
1374
MemoryDef *Def, Value *Size) {
1374
1375
if (MSSA->isLiveOnEntryDef (Def))
@@ -1403,6 +1404,24 @@ static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V,
1403
1404
return false ;
1404
1405
}
1405
1406
1407
+ static bool coversInputFully (MemorySSA *MSSA, MemCpyInst *MemCpy,
1408
+ MemIntrinsic *MemSrc, BatchAAResults &BAA) {
1409
+ // If the memcpy is larger than the previous, but the memory was undef prior
1410
+ // to that, we can just ignore the tail. Technically we're only
1411
+ // interested in the bytes from 0..MemSrcOffset and
1412
+ // MemSrcLength+MemSrcOffset..CopySize here, but as we can't easily represent
1413
+ // this location, we use the full 0..CopySize range.
1414
+ Value *CopySize = MemCpy->getLength ();
1415
+ MemoryLocation MemCpyLoc = MemoryLocation::getForSource (MemCpy);
1416
+ MemoryUseOrDef *MemSrcAccess = MSSA->getMemoryAccess (MemSrc);
1417
+ MemoryAccess *Clobber = MSSA->getWalker ()->getClobberingMemoryAccess (
1418
+ MemSrcAccess->getDefiningAccess (), MemCpyLoc, BAA);
1419
+ if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1420
+ if (hasUndefContents (MSSA, BAA, MemCpy->getSource (), MD, CopySize))
1421
+ return true ;
1422
+ return false ;
1423
+ }
1424
+
1406
1425
// / Transform memcpy to memset when its source was just memset.
1407
1426
// / In other words, turn:
1408
1427
// / \code
@@ -1418,51 +1437,63 @@ static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V,
1418
1437
bool MemCpyOptPass::performMemCpyToMemSetOptzn (MemCpyInst *MemCpy,
1419
1438
MemSetInst *MemSet,
1420
1439
BatchAAResults &BAA) {
1421
- // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
1422
- // memcpying from the same address. Otherwise it is hard to reason about.
1423
- if (!BAA.isMustAlias (MemSet->getRawDest (), MemCpy->getRawSource ()))
1424
- return false ;
1425
-
1426
1440
Value *MemSetSize = MemSet->getLength ();
1427
1441
Value *CopySize = MemCpy->getLength ();
1428
1442
1429
- if (MemSetSize != CopySize) {
1430
- // Make sure the memcpy doesn't read any more than what the memset wrote.
1431
- // Don't worry about sizes larger than i64.
1432
-
1433
- // A known memset size is required.
1434
- auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1435
- if (!CMemSetSize)
1443
+ int64_t MOffset = 0 ;
1444
+ const DataLayout &DL = MemCpy->getModule ()->getDataLayout ();
1445
+ // We can only transforms memcpy's where the dest of one is the source of the
1446
+ // other, or they have a known offset.
1447
+ if (MemCpy->getSource () != MemSet->getDest ()) {
1448
+ std::optional<int64_t > Offset =
1449
+ MemCpy->getSource ()->getPointerOffsetFrom (MemSet->getDest (), DL);
1450
+ if (!Offset)
1436
1451
return false ;
1452
+ MOffset = *Offset;
1453
+ }
1437
1454
1438
- // A known memcpy size is also required.
1455
+ MaybeAlign MDestAlign = MemCpy->getDestAlign ();
1456
+ int64_t MOffsetAligned = MDestAlign.valueOrOne ().value () > 1 && MOffset < 0 ? -(-MOffset & ~(MDestAlign.valueOrOne ().value () - 1 )) : MOffset; // Compute the MOffset that keeps MDest aligned (truncate towards zero)
1457
+ if (MOffset != 0 || MemSetSize != CopySize) {
1458
+ // Make sure the memcpy doesn't read any more than what the memset wrote, other than undef.
1459
+ auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1439
1460
auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1440
- if (!CCopySize)
1441
- return false ;
1442
- if (CCopySize->getZExtValue () > CMemSetSize->getZExtValue ()) {
1443
- // If the memcpy is larger than the memset, but the memory was undef prior
1444
- // to the memset, we can just ignore the tail. Technically we're only
1445
- // interested in the bytes from MemSetSize..CopySize here, but as we can't
1446
- // easily represent this location, we use the full 0..CopySize range.
1447
- MemoryLocation MemCpyLoc = MemoryLocation::getForSource (MemCpy);
1448
- bool CanReduceSize = false ;
1449
- MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess (MemSet);
1450
- MemoryAccess *Clobber = MSSA->getWalker ()->getClobberingMemoryAccess (
1451
- MemSetAccess->getDefiningAccess (), MemCpyLoc, BAA);
1452
- if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1453
- if (hasUndefContents (MSSA, BAA, MemCpy->getSource (), MD, CopySize))
1454
- CanReduceSize = true ;
1455
-
1456
- if (!CanReduceSize)
1461
+ // Don't worry about sizes larger than i64.
1462
+ if (!CMemSetSize || !CCopySize || MOffset < 0 ||
1463
+ CCopySize->getZExtValue () + MOffset > CMemSetSize->getZExtValue ()) {
1464
+ if (!coversInputFully (MSSA, MemCpy, MemSet, BAA))
1457
1465
return false ;
1458
- CopySize = MemSetSize;
1466
+
1467
+ if (CMemSetSize && CCopySize) {
1468
+ // If both have constant sizes and offsets, clip the memcpy to the bounds of the memset if applicable.
1469
+ if (CCopySize->getZExtValue () + std::abs (MOffset) > CMemSetSize->getZExtValue ()) {
1470
+ if (MOffsetAligned == 0 || (MOffset < 0 && CCopySize->getZExtValue () + MOffset > CMemSetSize->getZExtValue ()))
1471
+ CopySize = MemSetSize;
1472
+ else
1473
+ CopySize = ConstantInt::get (CopySize->getType (), std::max ((int64_t )0 , (int64_t )(CMemSetSize->getZExtValue () - std::abs (MOffsetAligned))));
1474
+ }
1475
+ else if (MOffsetAligned < 0 ) {
1476
+ // Even if CMemSetSize isn't known, if the MOffsetAligned is negative, make sure to clip the new memset
1477
+ CopySize = ConstantInt::get (CopySize->getType (), CCopySize->getZExtValue () + MOffsetAligned);
1478
+ }
1479
+ }
1480
+ else if (CCopySize && MOffsetAligned < 0 ) {
1481
+ // Even if CMemSetSize isn't known, if the MOffsetAligned is negative, can still clip the new memset
1482
+ CopySize = ConstantInt::get (CopySize->getType (), CCopySize->getZExtValue () + MOffsetAligned);
1483
+ }
1484
+ else {
1485
+ MOffsetAligned = 0 ;
1486
+ }
1459
1487
}
1460
1488
}
1461
1489
1462
1490
IRBuilder<> Builder (MemCpy);
1491
+ Value *MDest = MemCpy->getRawDest ();
1492
+ if (MOffsetAligned < 0 )
1493
+ MDest = Builder.CreateInBoundsPtrAdd (MDest, Builder.getInt64 (-MOffsetAligned));
1463
1494
Instruction *NewM =
1464
- Builder.CreateMemSet (MemCpy-> getRawDest () , MemSet->getOperand (1 ),
1465
- CopySize, MemCpy-> getDestAlign () );
1495
+ Builder.CreateMemSet (MDest , MemSet->getOperand (1 ),
1496
+ CopySize, MDestAlign );
1466
1497
auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess (MemCpy));
1467
1498
auto *NewAccess = MSSAU->createMemoryAccessAfter (NewM, nullptr , LastDef);
1468
1499
MSSAU->insertDef (cast<MemoryDef>(NewAccess), /* RenameUses=*/ true );
@@ -1683,7 +1714,7 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1683
1714
I->setMetadata (LLVMContext::MD_tbaa_struct, nullptr );
1684
1715
}
1685
1716
1686
- LLVM_DEBUG (dbgs () << " Stack Move: Performed staack -move optimization\n " );
1717
+ LLVM_DEBUG (dbgs () << " Stack Move: Performed stack -move optimization\n " );
1687
1718
NumStackMove++;
1688
1719
return true ;
1689
1720
}
0 commit comments