Skip to content

Commit f79e6a8

Browse files
committed
[MemCpyOptimizer] Simplify API of processStore and processMem* functions
Previously these functions either returned a "changed" flag or a "repeat instruction" flag, and could also modify an iterator to control which instruction would be processed next. Simplify this by always returning a "changed" flag, and handling all of the "repeat instruction" functionality by modifying the iterator. No functional change intended except in this case: // If the source and destination of the memcpy are the same, then zap it. ... where the previous code failed to process the instruction after the zapped memcpy. Differential Revision: https://reviews.llvm.org/D81540
1 parent 5951ff4 commit f79e6a8

File tree

2 files changed

+70
-59
lines changed

2 files changed

+70
-59
lines changed

llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,12 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
5959
bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
6060
bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI);
6161
bool processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI);
62-
bool processMemMove(MemMoveInst *M);
62+
bool processMemMove(MemMoveInst *M, BasicBlock::iterator &BBI);
6363
bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc,
6464
uint64_t cpyLen, Align cpyAlign, CallInst *C);
65-
bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep);
66-
bool processMemSetMemCpyDependence(MemCpyInst *M, MemSetInst *MDep);
67-
bool performMemCpyToMemSetOptzn(MemCpyInst *M, MemSetInst *MDep);
65+
Instruction *processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep);
66+
Instruction *processMemSetMemCpyDependence(MemCpyInst *M, MemSetInst *MDep);
67+
Instruction *performMemCpyToMemSetOptzn(MemCpyInst *M, MemSetInst *MDep);
6868
bool processByValArgument(CallBase &CB, unsigned ArgNo);
6969
Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
7070
Value *ByteVal);

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Lines changed: 66 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,8 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
501501
return true;
502502
}
503503

504+
/// If changes are made, return true and set BBI to the next instruction to
505+
/// visit.
504506
bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
505507
if (!SI->isSimple()) return false;
506508

@@ -578,7 +580,6 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
578580
LI->eraseFromParent();
579581
++NumMemCpyInstr;
580582

581-
// Make sure we do not invalidate the iterator.
582583
BBI = M->getIterator();
583584
return true;
584585
}
@@ -642,7 +643,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
642643
if (Value *ByteVal = isBytewiseValue(V, DL)) {
643644
if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
644645
ByteVal)) {
645-
BBI = I->getIterator(); // Don't invalidate iterator.
646+
BBI = I->getIterator();
646647
return true;
647648
}
648649

@@ -662,7 +663,6 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
662663
SI->eraseFromParent();
663664
NumMemSetInfer++;
664665

665-
// Make sure we do not invalidate the iterator.
666666
BBI = M->getIterator();
667667
return true;
668668
}
@@ -671,13 +671,15 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
671671
return false;
672672
}
673673

674+
/// If changes are made, return true and set BBI to the next instruction to
675+
/// visit.
674676
bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
675677
// See if there is another memset or store neighboring this memset which
676678
// allows us to widen out the memset to do a single larger store.
677679
if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
678680
if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
679681
MSI->getValue())) {
680-
BBI = I->getIterator(); // Don't invalidate iterator.
682+
BBI = I->getIterator();
681683
return true;
682684
}
683685
return false;
@@ -886,27 +888,27 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
886888

887889
/// We've found that the (upward scanning) memory dependence of memcpy 'M' is
888890
/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
889-
bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
890-
MemCpyInst *MDep) {
891+
Instruction *MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
892+
MemCpyInst *MDep) {
891893
// We can only transforms memcpy's where the dest of one is the source of the
892894
// other.
893895
if (M->getSource() != MDep->getDest() || MDep->isVolatile())
894-
return false;
896+
return nullptr;
895897

896898
// If dep instruction is reading from our current input, then it is a noop
897899
// transfer and substituting the input won't change this instruction. Just
898900
// ignore the input and let someone else zap MDep. This handles cases like:
899901
// memcpy(a <- a)
900902
// memcpy(b <- a)
901903
if (M->getSource() == MDep->getSource())
902-
return false;
904+
return nullptr;
903905

904906
// Second, the length of the memcpy's must be the same, or the preceding one
905907
// must be larger than the following one.
906908
ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
907909
ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
908910
if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
909-
return false;
911+
return nullptr;
910912

911913
AliasAnalysis &AA = LookupAliasAnalysis();
912914

@@ -926,7 +928,7 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
926928
MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
927929
M->getIterator(), M->getParent());
928930
if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
929-
return false;
931+
return nullptr;
930932

931933
// If the dest of the second might alias the source of the first, then the
932934
// source and dest might overlap. We still want to eliminate the intermediate
@@ -943,20 +945,21 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
943945
// TODO: Is this worth it if we're creating a less aligned memcpy? For
944946
// example we could be moving from movaps -> movq on x86.
945947
IRBuilder<> Builder(M);
948+
Instruction *MC;
946949
if (UseMemMove)
947-
Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
948-
MDep->getRawSource(), MDep->getSourceAlign(),
949-
M->getLength(), M->isVolatile());
950+
MC = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
951+
MDep->getRawSource(), MDep->getSourceAlign(),
952+
M->getLength(), M->isVolatile());
950953
else
951-
Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
952-
MDep->getRawSource(), MDep->getSourceAlign(),
953-
M->getLength(), M->isVolatile());
954+
MC = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
955+
MDep->getRawSource(), MDep->getSourceAlign(),
956+
M->getLength(), M->isVolatile());
954957

955958
// Remove the instruction we're replacing.
956959
MD->removeInstruction(M);
957960
M->eraseFromParent();
958961
++NumMemCpyInstr;
959-
return true;
962+
return MC;
960963
}
961964

962965
/// We've found that the (upward scanning) memory dependence of \p MemCpy is
@@ -973,18 +976,18 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
973976
/// memcpy(dst, src, src_size);
974977
/// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
975978
/// \endcode
976-
bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
977-
MemSetInst *MemSet) {
979+
Instruction *MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
980+
MemSetInst *MemSet) {
978981
// We can only transform memset/memcpy with the same destination.
979982
if (MemSet->getDest() != MemCpy->getDest())
980-
return false;
983+
return nullptr;
981984

982985
// Check that there are no other dependencies on the memset destination.
983986
MemDepResult DstDepInfo =
984987
MD->getPointerDependencyFrom(MemoryLocation::getForDest(MemSet), false,
985988
MemCpy->getIterator(), MemCpy->getParent());
986989
if (DstDepInfo.getInst() != MemSet)
987-
return false;
990+
return nullptr;
988991

989992
// Use the same i8* dest as the memcpy, killing the memset dest if different.
990993
Value *Dest = MemCpy->getRawDest();
@@ -1016,14 +1019,14 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
10161019
Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
10171020
Value *MemsetLen = Builder.CreateSelect(
10181021
Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
1019-
Builder.CreateMemSet(
1022+
auto *MS = Builder.CreateMemSet(
10201023
Builder.CreateGEP(Dest->getType()->getPointerElementType(), Dest,
10211024
SrcSize),
10221025
MemSet->getOperand(1), MemsetLen, MaybeAlign(Align));
10231026

10241027
MD->removeInstruction(MemSet);
10251028
MemSet->eraseFromParent();
1026-
return true;
1029+
return MS;
10271030
}
10281031

10291032
/// Determine whether the instruction has undefined content for the given Size,
@@ -1055,19 +1058,19 @@ static bool hasUndefContents(Instruction *I, ConstantInt *Size) {
10551058
/// When dst2_size <= dst1_size.
10561059
///
10571060
/// The \p MemCpy must have a Constant length.
1058-
bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1059-
MemSetInst *MemSet) {
1061+
Instruction *MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1062+
MemSetInst *MemSet) {
10601063
AliasAnalysis &AA = LookupAliasAnalysis();
10611064

10621065
// Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
10631066
// memcpying from the same address. Otherwise it is hard to reason about.
10641067
if (!AA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
1065-
return false;
1068+
return nullptr;
10661069

10671070
// A known memset size is required.
10681071
ConstantInt *MemSetSize = dyn_cast<ConstantInt>(MemSet->getLength());
10691072
if (!MemSetSize)
1070-
return false;
1073+
return nullptr;
10711074

10721075
// Make sure the memcpy doesn't read any more than what the memset wrote.
10731076
// Don't worry about sizes larger than i64.
@@ -1083,54 +1086,62 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
10831086
if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize))
10841087
CopySize = MemSetSize;
10851088
else
1086-
return false;
1089+
return nullptr;
10871090
}
10881091

10891092
IRBuilder<> Builder(MemCpy);
1090-
Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), CopySize,
1091-
MaybeAlign(MemCpy->getDestAlignment()));
1092-
return true;
1093+
return Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1094+
CopySize, MaybeAlign(MemCpy->getDestAlignment()));
10931095
}
10941096

10951097
/// Perform simplification of memcpy's. If we have memcpy A
10961098
/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
10971099
/// B to be a memcpy from X to Z (or potentially a memmove, depending on
10981100
/// circumstances). This allows later passes to remove the first memcpy
10991101
/// altogether.
1102+
/// If changes are made, return true and set BBI to the next instruction to
1103+
/// visit.
11001104
bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
11011105
// We can only optimize non-volatile memcpy's.
11021106
if (M->isVolatile()) return false;
11031107

11041108
// If the source and destination of the memcpy are the same, then zap it.
11051109
if (M->getSource() == M->getDest()) {
1106-
++BBI;
11071110
MD->removeInstruction(M);
11081111
M->eraseFromParent();
11091112
return true;
11101113
}
11111114

11121115
// If copying from a constant, try to turn the memcpy into a memset.
1113-
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource()))
1114-
if (GV->isConstant() && GV->hasDefinitiveInitializer())
1116+
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource())) {
1117+
if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
11151118
if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
11161119
M->getModule()->getDataLayout())) {
11171120
IRBuilder<> Builder(M);
1118-
Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
1119-
MaybeAlign(M->getDestAlignment()), false);
1121+
auto *MS =
1122+
Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
1123+
MaybeAlign(M->getDestAlignment()), false);
11201124
MD->removeInstruction(M);
11211125
M->eraseFromParent();
11221126
++NumCpyToSet;
1127+
BBI = MS->getIterator();
11231128
return true;
11241129
}
1130+
}
1131+
}
11251132

11261133
MemDepResult DepInfo = MD->getDependency(M);
11271134

11281135
// Try to turn a partially redundant memset + memcpy into
11291136
// memcpy + smaller memset. We don't need the memcpy size for this.
1130-
if (DepInfo.isClobber())
1131-
if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst()))
1132-
if (processMemSetMemCpyDependence(M, MDep))
1137+
if (DepInfo.isClobber()) {
1138+
if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst())) {
1139+
if (auto *MS = processMemSetMemCpyDependence(M, MDep)) {
1140+
BBI = MS->getIterator();
11331141
return true;
1142+
}
1143+
}
1144+
}
11341145

11351146
// The optimizations after this point require the memcpy size.
11361147
ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
@@ -1163,8 +1174,13 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
11631174
SrcLoc, true, M->getIterator(), M->getParent());
11641175

11651176
if (SrcDepInfo.isClobber()) {
1166-
if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
1167-
return processMemCpyMemCpyDependence(M, MDep);
1177+
if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst())) {
1178+
if (auto *MC = processMemCpyMemCpyDependence(M, MDep)) {
1179+
BBI = MC->getIterator();
1180+
return true;
1181+
}
1182+
return false;
1183+
}
11681184
} else if (SrcDepInfo.isDef()) {
11691185
if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) {
11701186
MD->removeInstruction(M);
@@ -1176,10 +1192,11 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
11761192

11771193
if (SrcDepInfo.isClobber())
11781194
if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst()))
1179-
if (performMemCpyToMemSetOptzn(M, MDep)) {
1195+
if (auto *MS = performMemCpyToMemSetOptzn(M, MDep)) {
11801196
MD->removeInstruction(M);
11811197
M->eraseFromParent();
11821198
++NumCpyToSet;
1199+
BBI = MS->getIterator();
11831200
return true;
11841201
}
11851202

@@ -1188,7 +1205,9 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
11881205

11891206
/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
11901207
/// not to alias.
1191-
bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
1208+
/// If changes are made, return true and set BBI to the next instruction to
1209+
/// visit.
1210+
bool MemCpyOptPass::processMemMove(MemMoveInst *M, BasicBlock::iterator &BBI) {
11921211
AliasAnalysis &AA = LookupAliasAnalysis();
11931212

11941213
if (!TLI->has(LibFunc_memmove))
@@ -1214,6 +1233,7 @@ bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
12141233
MD->removeInstruction(M);
12151234

12161235
++NumMoveToCpy;
1236+
BBI = M->getIterator();
12171237
return true;
12181238
}
12191239

@@ -1316,28 +1336,19 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) {
13161336
// Avoid invalidating the iterator.
13171337
Instruction *I = &*BI++;
13181338

1319-
bool RepeatInstruction = false;
1320-
13211339
if (StoreInst *SI = dyn_cast<StoreInst>(I))
13221340
MadeChange |= processStore(SI, BI);
13231341
else if (MemSetInst *M = dyn_cast<MemSetInst>(I))
1324-
RepeatInstruction = processMemSet(M, BI);
1342+
MadeChange = processMemSet(M, BI);
13251343
else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I))
1326-
RepeatInstruction = processMemCpy(M, BI);
1344+
MadeChange = processMemCpy(M, BI);
13271345
else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
1328-
RepeatInstruction = processMemMove(M);
1346+
MadeChange = processMemMove(M, BI);
13291347
else if (auto *CB = dyn_cast<CallBase>(I)) {
13301348
for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
13311349
if (CB->isByValArgument(i))
13321350
MadeChange |= processByValArgument(*CB, i);
13331351
}
1334-
1335-
// Reprocess the instruction if desired.
1336-
if (RepeatInstruction) {
1337-
if (BI != BB.begin())
1338-
--BI;
1339-
MadeChange = true;
1340-
}
13411352
}
13421353
}
13431354

0 commit comments

Comments
 (0)