Skip to content

Commit 5946696

Browse files
authored
[msan] Handle NEON vector load (#130457)
This adds an explicit handler for: - llvm.aarch64.neon.ld1x2, llvm.aarch64.neon.ld1x3, llvm.aarch64.neon.ld1x4 - llvm.aarch64.neon.ld2, llvm.aarch64.neon.ld3, llvm.aarch64.neon.ld4 - llvm.aarch64.neon.ld2lane, llvm.aarch64.neon.ld3lane, llvm.aarch64.neon.ld4lane - llvm.aarch64.neon.ld2r, llvm.aarch64.neon.ld3r, llvm.aarch64.neon.ld4r instead of relying on the default strict handler. Updates the tests from #125267
1 parent c3e1633 commit 5946696

File tree

2 files changed

+1205
-923
lines changed

2 files changed

+1205
-923
lines changed

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 95 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4132,10 +4132,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
41324132

41334133
CallInst *CI =
41344134
IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(), ShadowArgs);
4135-
// The intrinsic may require floating-point but shadows can be arbitrary
4136-
// bit patterns, of which some would be interpreted as "invalid"
4137-
// floating-point values (NaN etc.); we assume the intrinsic will happily
4138-
// copy them.
4135+
// The AVX masked load intrinsics do not have integer variants. We use the
4136+
// floating-point variants, which will happily copy the shadows even if
4137+
// they are interpreted as "invalid" floating-point values (NaN etc.).
41394138
setShadow(&I, IRB.CreateBitCast(CI, getShadowTy(&I)));
41404139

41414140
if (!MS.TrackOrigins)
@@ -4301,6 +4300,78 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
43014300
setOriginForNaryOp(I);
43024301
}
43034302

4303+
// Handle Arm NEON vector load intrinsics (vld*).
4304+
//
4305+
// The WithLane instructions (ld[234]lane) are similar to:
4306+
// call {<4 x i32>, <4 x i32>, <4 x i32>}
4307+
// @llvm.aarch64.neon.ld3lane.v4i32.p0
4308+
// (<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 %lane, ptr
4309+
// %A)
4310+
//
4311+
// The non-WithLane instructions (ld[234], ld1x[234], ld[234]r) are similar
4312+
// to:
4313+
// call {<8 x i8>, <8 x i8>} @llvm.aarch64.neon.ld2.v8i8.p0(ptr %A)
4314+
void handleNEONVectorLoad(IntrinsicInst &I, bool WithLane) {
4315+
unsigned int numArgs = I.arg_size();
4316+
4317+
// Return type is a struct of vectors of integers or floating-point
4318+
assert(I.getType()->isStructTy());
4319+
[[maybe_unused]] StructType *RetTy = cast<StructType>(I.getType());
4320+
assert(RetTy->getNumElements() > 0);
4321+
assert(RetTy->getElementType(0)->isIntOrIntVectorTy() ||
4322+
RetTy->getElementType(0)->isFPOrFPVectorTy());
4323+
for (unsigned int i = 0; i < RetTy->getNumElements(); i++)
4324+
assert(RetTy->getElementType(i) == RetTy->getElementType(0));
4325+
4326+
if (WithLane) {
4327+
// 2, 3 or 4 vectors, plus lane number, plus input pointer
4328+
assert(4 <= numArgs && numArgs <= 6);
4329+
4330+
// Return type is a struct of the input vectors
4331+
assert(RetTy->getNumElements() + 2 == numArgs);
4332+
for (unsigned int i = 0; i < RetTy->getNumElements(); i++)
4333+
assert(I.getArgOperand(i)->getType() == RetTy->getElementType(0));
4334+
} else {
4335+
assert(numArgs == 1);
4336+
}
4337+
4338+
IRBuilder<> IRB(&I);
4339+
4340+
SmallVector<Value *, 6> ShadowArgs;
4341+
if (WithLane) {
4342+
for (unsigned int i = 0; i < numArgs - 2; i++)
4343+
ShadowArgs.push_back(getShadow(I.getArgOperand(i)));
4344+
4345+
// Lane number, passed verbatim
4346+
Value *LaneNumber = I.getArgOperand(numArgs - 2);
4347+
ShadowArgs.push_back(LaneNumber);
4348+
4349+
// TODO: blend shadow of lane number into output shadow?
4350+
insertShadowCheck(LaneNumber, &I);
4351+
}
4352+
4353+
Value *Src = I.getArgOperand(numArgs - 1);
4354+
assert(Src->getType()->isPointerTy() && "Source is not a pointer!");
4355+
4356+
Type *SrcShadowTy = getShadowTy(Src);
4357+
auto [SrcShadowPtr, SrcOriginPtr] =
4358+
getShadowOriginPtr(Src, IRB, SrcShadowTy, Align(1), /*isStore*/ false);
4359+
ShadowArgs.push_back(SrcShadowPtr);
4360+
4361+
// The NEON vector load instructions handled by this function all have
4362+
// integer variants. It is easier to use those rather than trying to cast
4363+
// a struct of vectors of floats into a struct of vectors of integers.
4364+
CallInst *CI =
4365+
IRB.CreateIntrinsic(getShadowTy(&I), I.getIntrinsicID(), ShadowArgs);
4366+
setShadow(&I, CI);
4367+
4368+
if (!MS.TrackOrigins)
4369+
return;
4370+
4371+
Value *PtrSrcOrigin = IRB.CreateLoad(MS.OriginTy, SrcOriginPtr);
4372+
setOrigin(&I, PtrSrcOrigin);
4373+
}
4374+
43044375
/// Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
43054376
/// and vst{2,3,4}lane).
43064377
///
@@ -5011,6 +5082,26 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
50115082
handleVectorReduceIntrinsic(I, /*AllowShadowCast=*/true);
50125083
break;
50135084

5085+
case Intrinsic::aarch64_neon_ld1x2:
5086+
case Intrinsic::aarch64_neon_ld1x3:
5087+
case Intrinsic::aarch64_neon_ld1x4:
5088+
case Intrinsic::aarch64_neon_ld2:
5089+
case Intrinsic::aarch64_neon_ld3:
5090+
case Intrinsic::aarch64_neon_ld4:
5091+
case Intrinsic::aarch64_neon_ld2r:
5092+
case Intrinsic::aarch64_neon_ld3r:
5093+
case Intrinsic::aarch64_neon_ld4r: {
5094+
handleNEONVectorLoad(I, /*WithLane=*/false);
5095+
break;
5096+
}
5097+
5098+
case Intrinsic::aarch64_neon_ld2lane:
5099+
case Intrinsic::aarch64_neon_ld3lane:
5100+
case Intrinsic::aarch64_neon_ld4lane: {
5101+
handleNEONVectorLoad(I, /*WithLane=*/true);
5102+
break;
5103+
}
5104+
50145105
// Saturating extract narrow
50155106
case Intrinsic::aarch64_neon_sqxtn:
50165107
case Intrinsic::aarch64_neon_sqxtun:

0 commit comments

Comments
 (0)