-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[MemCpyOpt] handle memcpy from memset in more cases #140954
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1367,8 +1367,9 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy, | |
return true; | ||
} | ||
|
||
/// Determine whether the instruction has undefined content for the given Size, | ||
/// either because it was freshly alloca'd or started its lifetime. | ||
/// Determine whether the pointer V had only undefined content from Def up to | ||
/// the given Size, either because it was freshly alloca'd or started its | ||
/// lifetime. | ||
static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V, | ||
MemoryDef *Def, Value *Size) { | ||
if (MSSA->isLiveOnEntryDef(Def)) | ||
|
@@ -1403,6 +1404,24 @@ static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V, | |
return false; | ||
} | ||
|
||
static bool coversInputFully(MemorySSA *MSSA, MemCpyInst *MemCpy, | ||
MemIntrinsic *MemSrc, BatchAAResults &BAA) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The name of this helper seems quite misleading. The corresponding boolean was previously called CanReduceSize, which is what this is actually checking (can we reduce CopySize to the size written by MemSrc). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't really get what followup change you have in mind, but if you think canReduceSize is inaccurate, then maybe something along the lines of overreadIsUndef? I don't think the new name inputFullyCoveredBySrc is really better than the old, because this function is specifically detecting the case where the input is not fully covered by src (it is partially covered by src and partially by undef initialization)... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The CanReduceSize bool is actually a property of having both sizes be constants (the other conditional here). If they aren't constants, then you cannot reduce the size, though this query and this optimization are still okay |
||
// If the memcpy is larger than the previous, but the memory was undef prior | ||
// to that, we can just ignore the tail. Technically we're only | ||
// interested in the bytes from 0..MemSrcOffset and | ||
// MemSrcLength+MemSrcOffset..CopySize here, but as we can't easily represent | ||
// this location, we use the full 0..CopySize range. | ||
Value *CopySize = MemCpy->getLength(); | ||
MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy); | ||
MemoryUseOrDef *MemSrcAccess = MSSA->getMemoryAccess(MemSrc); | ||
MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( | ||
MemSrcAccess->getDefiningAccess(), MemCpyLoc, BAA); | ||
if (auto *MD = dyn_cast<MemoryDef>(Clobber)) | ||
if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD, CopySize)) | ||
return true; | ||
return false; | ||
} | ||
|
||
/// Transform memcpy to memset when its source was just memset. | ||
/// In other words, turn: | ||
/// \code | ||
|
@@ -1418,19 +1437,26 @@ static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V, | |
bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, | ||
MemSetInst *MemSet, | ||
BatchAAResults &BAA) { | ||
// Make sure that memcpy(..., memset(...), ...), that is we are memsetting and | ||
// memcpying from the same address. Otherwise it is hard to reason about. | ||
if (!BAA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource())) | ||
dianqk marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return false; | ||
|
||
Value *MemSetSize = MemSet->getLength(); | ||
Value *CopySize = MemCpy->getLength(); | ||
|
||
if (MemSetSize != CopySize) { | ||
// Make sure the memcpy doesn't read any more than what the memset wrote. | ||
// Don't worry about sizes larger than i64. | ||
int64_t MOffset = 0; | ||
dianqk marked this conversation as resolved.
Show resolved
Hide resolved
|
||
const DataLayout &DL = MemCpy->getModule()->getDataLayout(); | ||
// We can only transforms memcpy's where the dest of one is the source of the | ||
// other, or the memory transfer has a known offset from the memset. | ||
if (MemCpy->getSource() != MemSet->getDest()) { | ||
std::optional<int64_t> Offset = | ||
MemCpy->getSource()->getPointerOffsetFrom(MemSet->getDest(), DL); | ||
if (!Offset || *Offset < 0) | ||
return false; | ||
MOffset = *Offset; | ||
} | ||
|
||
// A known memset size is required. | ||
MaybeAlign MDestAlign = MemCpy->getDestAlign(); | ||
nikic marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (MOffset != 0 || MemSetSize != CopySize) { | ||
dianqk marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// Make sure the memcpy doesn't read any more than what the memset wrote, | ||
// other than undef. Don't worry about sizes larger than i64. A known memset | ||
// size is required. | ||
auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize); | ||
if (!CMemSetSize) | ||
return false; | ||
|
@@ -1439,23 +1465,18 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, | |
auto *CCopySize = dyn_cast<ConstantInt>(CopySize); | ||
if (!CCopySize) | ||
return false; | ||
if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we also reduce the diff here? How about just modifying the comments? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the helper function makes this function more readable, and the helper function should be more generally useful, since it implements a check that other functions in this file should also implement. In particular, I'm wanting to use this in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In my opinion, making a new function is the more general approach when sharing code or when a piece of code grows significantly in size. --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1790,6 +1790,9 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
}
}
}
+ if (truncateUndefs(M, MI)) {
+ return true;
+ }
if (auto *MDep = dyn_cast<MemCpyInst>(MI))
if (processMemCpyMemCpyDependence(M, MDep, BAA))
return true;
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe this helper function should also be used in |
||
// If the memcpy is larger than the memset, but the memory was undef prior | ||
// to the memset, we can just ignore the tail. Technically we're only | ||
// interested in the bytes from MemSetSize..CopySize here, but as we can't | ||
// easily represent this location, we use the full 0..CopySize range. | ||
MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy); | ||
bool CanReduceSize = false; | ||
MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet); | ||
MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( | ||
MemSetAccess->getDefiningAccess(), MemCpyLoc, BAA); | ||
if (auto *MD = dyn_cast<MemoryDef>(Clobber)) | ||
if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD, CopySize)) | ||
CanReduceSize = true; | ||
|
||
if (!CanReduceSize) | ||
if (CCopySize->getZExtValue() + MOffset > CMemSetSize->getZExtValue()) { | ||
if (!coversInputFully(MSSA, MemCpy, MemSet, BAA)) | ||
return false; | ||
CopySize = MemSetSize; | ||
// Clip the memcpy to the bounds of the memset | ||
if (MOffset == 0) | ||
CopySize = MemSetSize; | ||
else | ||
CopySize = | ||
ConstantInt::get(CopySize->getType(), | ||
CMemSetSize->getZExtValue() <= (uint64_t)MOffset | ||
? 0 | ||
: CMemSetSize->getZExtValue() - MOffset); | ||
} | ||
} | ||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.