Skip to content

[msan] Support vst1x_{2,3,4} and vst_{2,3,4} with floating-point parameters #100644

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 29 additions & 10 deletions llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3873,11 +3873,18 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOriginForNaryOp(I);
}

/// Handle Arm NEON vector store intrinsics (vst{2,3,4}).
/// Handle Arm NEON vector store intrinsics (vst{2,3,4} and vst1x_{2,3,4}).
///
/// Arm NEON vector store intrinsics have the output address (pointer) as the
/// last argument, with the initial arguments being the inputs. They return
/// void.
///
/// - st4 interleaves the output e.g., st4 (inA, inB, inC, inD, outP) writes
/// abcdabcdabcdabcd... into *outP
/// - st1_x4 is non-interleaved e.g., st1_x4 (inA, inB, inC, inD, outP)
/// writes aaaa...bbbb...cccc...dddd... into *outP
/// These instructions can all be instrumented with essentially the same
/// MSan logic, simply by applying the corresponding intrinsic to the shadow.
void handleNEONVectorStoreIntrinsic(IntrinsicInst &I) {
IRBuilder<> IRB(&I);

Expand All @@ -3892,11 +3899,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (ClCheckAccessAddress)
insertShadowCheck(Addr, &I);

SmallVector<Value *, 8> Shadows;
// Every arg operand, other than the last one, is an input vector
IntrinsicInst *ShadowI = cast<IntrinsicInst>(I.clone());
for (int i = 0; i < numArgOperands - 1; i++) {
assert(isa<FixedVectorType>(I.getArgOperand(i)->getType()));
ShadowI->setArgOperand(i, getShadow(&I, i));
Value *Shadow = getShadow(&I, i);
Shadows.append(1, Shadow);
}

// MSan's GetShadowTy assumes the LHS is the type we want the shadow for
Expand All @@ -3914,13 +3922,20 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
cast<FixedVectorType>(I.getArgOperand(0)->getType())->getElementType(),
cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements() *
(numArgOperands - 1));
Type *ShadowTy = getShadowTy(OutputVectorTy);
Value *ShadowPtr, *OriginPtr;
Type *OutputShadowTy = getShadowTy(OutputVectorTy);

Value *OutputShadowPtr, *OutputOriginPtr;
// AArch64 NEON does not need alignment (unless OS requires it)
std::tie(ShadowPtr, OriginPtr) =
getShadowOriginPtr(Addr, IRB, ShadowTy, Align(1), /*isStore*/ true);
ShadowI->setArgOperand(numArgOperands - 1, ShadowPtr);
ShadowI->insertAfter(&I);
std::tie(OutputShadowPtr, OutputOriginPtr) = getShadowOriginPtr(
Addr, IRB, OutputShadowTy, Align(1), /*isStore*/ true);
Shadows.append(1, OutputShadowPtr);

// CreateIntrinsic will select the correct (integer) type for the
// intrinsic; the original instruction I may have either integer- or
// float-type inputs.
CallInst *CI =
IRB.CreateIntrinsic(IRB.getVoidTy(), I.getIntrinsicID(), Shadows);
setShadow(&I, CI);

if (MS.TrackOrigins) {
// TODO: if we modelled the vst* instruction more precisely, we could
Expand All @@ -3932,7 +3947,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
OC.Add(I.getArgOperand(i));

const DataLayout &DL = F.getDataLayout();
OC.DoneAndStoreOrigin(DL.getTypeStoreSize(OutputVectorTy), OriginPtr);
OC.DoneAndStoreOrigin(DL.getTypeStoreSize(OutputVectorTy),
OutputOriginPtr);
}
}

Expand Down Expand Up @@ -4277,6 +4293,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOrigin(&I, getCleanOrigin());
break;

case Intrinsic::aarch64_neon_st1x2:
case Intrinsic::aarch64_neon_st1x3:
case Intrinsic::aarch64_neon_st1x4:
case Intrinsic::aarch64_neon_st2:
case Intrinsic::aarch64_neon_st3:
case Intrinsic::aarch64_neon_st4: {
Expand Down
Loading
Loading