Skip to content

[msan] Handle NEON vector load #130457

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 95 additions & 4 deletions llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4073,10 +4073,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {

CallInst *CI =
IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(), ShadowArgs);
// The intrinsic may require floating-point but shadows can be arbitrary
// bit patterns, of which some would be interpreted as "invalid"
// floating-point values (NaN etc.); we assume the intrinsic will happily
// copy them.
// The AVX masked load intrinsics do not have integer variants. We use the
// floating-point variants, which will happily copy the shadows even if
// they are interpreted as "invalid" floating-point values (NaN etc.).
setShadow(&I, IRB.CreateBitCast(CI, getShadowTy(&I)));

if (!MS.TrackOrigins)
Expand Down Expand Up @@ -4242,6 +4241,78 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOriginForNaryOp(I);
}

// Handle Arm NEON vector load intrinsics (vld*).
//
// The WithLane instructions (ld[234]lane) are similar to:
// call {<4 x i32>, <4 x i32>, <4 x i32>}
// @llvm.aarch64.neon.ld3lane.v4i32.p0
// (<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 %lane, ptr
// %A)
//
// The non-WithLane instructions (ld[234], ld1x[234], ld[234]r) are similar
// to:
// call {<8 x i8>, <8 x i8>} @llvm.aarch64.neon.ld2.v8i8.p0(ptr %A)
void handleNEONVectorLoad(IntrinsicInst &I, bool WithLane) {
unsigned int numArgs = I.arg_size();

// Return type is a struct of vectors of integers or floating-point
assert(I.getType()->isStructTy());
[[maybe_unused]] StructType *RetTy = cast<StructType>(I.getType());
Copy link
Collaborator

@vitalybuka vitalybuka Mar 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[[maybe_unused]] is not needed because of loop?

assert(RetTy->getNumElements() > 0);
assert(RetTy->getElementType(0)->isIntOrIntVectorTy() ||
RetTy->getElementType(0)->isFPOrFPVectorTy());
for (unsigned int i = 0; i < RetTy->getNumElements(); i++)
assert(RetTy->getElementType(i) == RetTy->getElementType(0));

if (WithLane) {
// 2, 3 or 4 vectors, plus lane number, plus input pointer
assert(4 <= numArgs && numArgs <= 6);

// Return type is a struct of the input vectors
assert(RetTy->getNumElements() + 2 == numArgs);
for (unsigned int i = 0; i < RetTy->getNumElements(); i++)
assert(I.getArgOperand(i)->getType() == RetTy->getElementType(0));
} else {
assert(numArgs == 1);
}

IRBuilder<> IRB(&I);

SmallVector<Value *, 6> ShadowArgs;
if (WithLane) {
for (unsigned int i = 0; i < numArgs - 2; i++)
ShadowArgs.push_back(getShadow(I.getArgOperand(i)));

// Lane number, passed verbatim
Value *LaneNumber = I.getArgOperand(numArgs - 2);
ShadowArgs.push_back(LaneNumber);

// TODO: blend shadow of lane number into output shadow?
insertShadowCheck(LaneNumber, &I);
}

Value *Src = I.getArgOperand(numArgs - 1);
assert(Src->getType()->isPointerTy() && "Source is not a pointer!");

Type *SrcShadowTy = getShadowTy(Src);
auto [SrcShadowPtr, SrcOriginPtr] =
getShadowOriginPtr(Src, IRB, SrcShadowTy, Align(1), /*isStore*/ false);
ShadowArgs.push_back(SrcShadowPtr);

// The NEON vector load instructions handled by this function all have
// integer variants. It is easier to use those rather than trying to cast
// a struct of vectors of floats into a struct of vectors of integers.
CallInst *CI =
IRB.CreateIntrinsic(getShadowTy(&I), I.getIntrinsicID(), ShadowArgs);
setShadow(&I, CI);

if (!MS.TrackOrigins)
return;

Value *PtrSrcOrigin = IRB.CreateLoad(MS.OriginTy, SrcOriginPtr);
setOrigin(&I, PtrSrcOrigin);
}

/// Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
/// and vst{2,3,4}lane).
///
Expand Down Expand Up @@ -4946,6 +5017,26 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
handleVectorReduceIntrinsic(I, /*AllowShadowCast=*/true);
break;

case Intrinsic::aarch64_neon_ld1x2:
case Intrinsic::aarch64_neon_ld1x3:
case Intrinsic::aarch64_neon_ld1x4:
case Intrinsic::aarch64_neon_ld2:
case Intrinsic::aarch64_neon_ld3:
case Intrinsic::aarch64_neon_ld4:
case Intrinsic::aarch64_neon_ld2r:
case Intrinsic::aarch64_neon_ld3r:
case Intrinsic::aarch64_neon_ld4r: {
handleNEONVectorLoad(I, /*WithLane=*/false);
break;
}

case Intrinsic::aarch64_neon_ld2lane:
case Intrinsic::aarch64_neon_ld3lane:
case Intrinsic::aarch64_neon_ld4lane: {
handleNEONVectorLoad(I, /*WithLane=*/true);
break;
}

// Saturating extract narrow
case Intrinsic::aarch64_neon_sqxtn:
case Intrinsic::aarch64_neon_sqxtun:
Expand Down
Loading