@@ -107,6 +107,9 @@ struct MemsetRange {
107
107
108
108
} // end anonymous namespace
109
109
110
+ static bool overreadUndefContents (MemorySSA *MSSA, MemCpyInst *MemCpy,
111
+ MemIntrinsic *MemSrc, BatchAAResults &BAA);
112
+
110
113
bool MemsetRange::isProfitableToUseMemset (const DataLayout &DL) const {
111
114
// If we found more than 4 stores to merge or 16 bytes, use memset.
112
115
if (TheStores.size () >= 4 || End - Start >= 16 )
@@ -1129,14 +1132,29 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1129
1132
MForwardOffset = *Offset;
1130
1133
}
1131
1134
1132
- // The length of the memcpy's must be the same, or the preceding one
1133
- // must be larger than the following one.
1134
- if (MForwardOffset != 0 || MDep->getLength () != M->getLength ()) {
1135
+ Value *CopyLength = M->getLength ();
1136
+
1137
+ // The length of the memcpy's must be the same, or the preceding one must be
1138
+ // larger than the following one, or the contents of the overread must be
1139
+ // undefined bytes of a defined size.
1140
+ if (MForwardOffset != 0 || MDep->getLength () != CopyLength) {
1135
1141
auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength ());
1136
- auto *MLen = dyn_cast<ConstantInt>(M->getLength ());
1137
- if (!MDepLen || !MLen ||
1138
- MDepLen->getZExtValue () < MLen->getZExtValue () + MForwardOffset)
1142
+ auto *MLen = dyn_cast<ConstantInt>(CopyLength);
1143
+ // This could be converted to a runtime test (%CopyLength =
1144
+ // min(max(0, MDepLen - MForwardOffset), MLen)), but it is
1145
+ // unclear if that is useful
1146
+ if (!MDepLen || !MLen)
1139
1147
return false ;
1148
+ if (MDepLen->getZExtValue () < MLen->getZExtValue () + MForwardOffset) {
1149
+ if (!overreadUndefContents (MSSA, M, MDep, BAA))
1150
+ return false ;
1151
+ if (MDepLen->getZExtValue () <= (uint64_t )MForwardOffset)
1152
+ return false ; // Should not reach here (there is obviously no aliasing
1153
+ // with MDep), so just bail in case it had incomplete info
1154
+ // somehow
1155
+ CopyLength = ConstantInt::get (CopyLength->getType (),
1156
+ MDepLen->getZExtValue () - MForwardOffset);
1157
+ }
1140
1158
}
1141
1159
1142
1160
IRBuilder<> Builder (M);
@@ -1152,9 +1170,13 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1152
1170
eraseInstruction (NewCopySource);
1153
1171
});
1154
1172
MaybeAlign CopySourceAlign = MDep->getSourceAlign ();
1155
- // We just need to calculate the actual size of the copy.
1156
- auto MCopyLoc = MemoryLocation::getForSource (MDep).getWithNewSize (
1157
- MemoryLocation::getForSource (M).Size );
1173
+ auto MCopyLoc = MemoryLocation::getForSource (MDep);
1174
+ // Truncate the size of the MDep access to just the bytes read
1175
+ if (MDep->getLength () != CopyLength) {
1176
+ auto *ConstLength = cast<ConstantInt>(CopyLength);
1177
+ MCopyLoc = MCopyLoc.getWithNewSize (
1178
+ LocationSize::precise (ConstLength->getZExtValue ()));
1179
+ }
1158
1180
1159
1181
// When the forwarding offset is greater than 0, we transform
1160
1182
// memcpy(d1 <- s1)
@@ -1223,20 +1245,18 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1223
1245
// example we could be moving from movaps -> movq on x86.
1224
1246
Instruction *NewM;
1225
1247
if (UseMemMove)
1226
- NewM =
1227
- Builder.CreateMemMove (M->getDest (), M->getDestAlign (), CopySource,
1228
- CopySourceAlign, M->getLength (), M->isVolatile ());
1248
+ NewM = Builder.CreateMemMove (M->getDest (), M->getDestAlign (), CopySource,
1249
+ CopySourceAlign, CopyLength, M->isVolatile ());
1229
1250
else if (M->isForceInlined ())
1230
1251
// llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
1231
1252
// never allowed since that would allow the latter to be lowered as a call
1232
1253
// to an external function.
1233
1254
NewM = Builder.CreateMemCpyInline (M->getDest (), M->getDestAlign (),
1234
- CopySource, CopySourceAlign,
1235
- M->getLength (), M-> isVolatile ());
1255
+ CopySource, CopySourceAlign, CopyLength,
1256
+ M->isVolatile ());
1236
1257
else
1237
1258
NewM = Builder.CreateMemCpy (M->getDest (), M->getDestAlign (), CopySource,
1238
- CopySourceAlign, M->getLength (),
1239
- M->isVolatile ());
1259
+ CopySourceAlign, CopyLength, M->isVolatile ());
1240
1260
1241
1261
NewM->copyMetadata (*M, LLVMContext::MD_DIAssignID);
1242
1262
0 commit comments