Skip to content

Commit 063db51

Browse files
committed
Reapply "[msan] Add handlers for AVX masked load/store intrinsics (#123857)"
This reverts commit b9d301c i.e., relands db79fb2. I had mistakenly thought this caused a buildbot breakage (the actual culprit was my other patch, #123980, which landed at the same time) and thus had reverted it even though AFAIK it is not broken.
1 parent 17d1523 commit 063db51

File tree

5 files changed

+489
-289
lines changed

5 files changed

+489
-289
lines changed

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 153 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3046,7 +3046,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
30463046
if (maybeHandleSimpleNomemIntrinsic(I))
30473047
return true;
30483048

3049-
// FIXME: detect and handle SSE maskstore/maskload
3049+
// FIXME: detect and handle SSE maskstore/maskload?
3050+
// Some cases are now handled in handleAVXMasked{Load,Store}.
30503051
return false;
30513052
}
30523053

@@ -3683,6 +3684,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
36833684
// TODO: Store origin.
36843685
}
36853686

3687+
// Intrinsic::masked_store
3688+
//
3689+
// Note: handleAVXMaskedStore handles AVX/AVX2 variants, though AVX512 masked
3690+
// stores are lowered to Intrinsic::masked_store.
36863691
void handleMaskedStore(IntrinsicInst &I) {
36873692
IRBuilder<> IRB(&I);
36883693
Value *V = I.getArgOperand(0);
@@ -3713,6 +3718,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
37133718
std::max(Alignment, kMinOriginAlignment));
37143719
}
37153720

3721+
// Intrinsic::masked_load
3722+
//
3723+
// Note: handleAVXMaskedLoad handles AVX/AVX2 variants, though AVX512 masked
3724+
// loads are lowered to Intrinsic::masked_load.
37163725
void handleMaskedLoad(IntrinsicInst &I) {
37173726
IRBuilder<> IRB(&I);
37183727
Value *Ptr = I.getArgOperand(0);
@@ -3754,6 +3763,125 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
37543763
setOrigin(&I, Origin);
37553764
}
37563765

3766+
// e.g., void @llvm.x86.avx.maskstore.ps.256(ptr, <8 x i32>, <8 x float>)
3767+
// dst mask src
3768+
//
3769+
// AVX512 masked stores are lowered to Intrinsic::masked_load and are handled
3770+
// by handleMaskedStore.
3771+
//
3772+
// This function handles AVX and AVX2 masked stores; these use the MSBs of a
3773+
// vector of integers, unlike the LLVM masked intrinsics, which require a
3774+
// vector of booleans. X86InstCombineIntrinsic.cpp::simplifyX86MaskedLoad
3775+
// mentions that the x86 backend does not know how to efficiently convert
3776+
// from a vector of booleans back into the AVX mask format; therefore, they
3777+
// (and we) do not reduce AVX/AVX2 masked intrinsics into LLVM masked
3778+
// intrinsics.
3779+
void handleAVXMaskedStore(IntrinsicInst &I) {
3780+
IRBuilder<> IRB(&I);
3781+
3782+
Value *Dst = I.getArgOperand(0);
3783+
assert(Dst->getType()->isPointerTy() && "Destination is not a pointer!");
3784+
3785+
Value *Mask = I.getArgOperand(1);
3786+
assert(isa<VectorType>(Mask->getType()) && "Mask is not a vector!");
3787+
3788+
Value *Src = I.getArgOperand(2);
3789+
assert(isa<VectorType>(Src->getType()) && "Source is not a vector!");
3790+
3791+
const Align Alignment = Align(1);
3792+
3793+
Value *SrcShadow = getShadow(Src);
3794+
3795+
if (ClCheckAccessAddress) {
3796+
insertShadowCheck(Dst, &I);
3797+
insertShadowCheck(Mask, &I);
3798+
}
3799+
3800+
Value *DstShadowPtr;
3801+
Value *DstOriginPtr;
3802+
std::tie(DstShadowPtr, DstOriginPtr) = getShadowOriginPtr(
3803+
Dst, IRB, SrcShadow->getType(), Alignment, /*isStore*/ true);
3804+
3805+
SmallVector<Value *, 2> ShadowArgs;
3806+
ShadowArgs.append(1, DstShadowPtr);
3807+
ShadowArgs.append(1, Mask);
3808+
// The intrinsic may require floating-point but shadows can be arbitrary
3809+
// bit patterns, of which some would be interpreted as "invalid"
3810+
// floating-point values (NaN etc.); we assume the intrinsic will happily
3811+
// copy them.
3812+
ShadowArgs.append(1, IRB.CreateBitCast(SrcShadow, Src->getType()));
3813+
3814+
CallInst *CI =
3815+
IRB.CreateIntrinsic(IRB.getVoidTy(), I.getIntrinsicID(), ShadowArgs);
3816+
setShadow(&I, CI);
3817+
3818+
if (!MS.TrackOrigins)
3819+
return;
3820+
3821+
// Approximation only
3822+
auto &DL = F.getDataLayout();
3823+
paintOrigin(IRB, getOrigin(Src), DstOriginPtr,
3824+
DL.getTypeStoreSize(SrcShadow->getType()),
3825+
std::max(Alignment, kMinOriginAlignment));
3826+
}
3827+
3828+
// e.g., <8 x float> @llvm.x86.avx.maskload.ps.256(ptr, <8 x i32>)
3829+
// return src mask
3830+
//
3831+
// Masked-off values are replaced with 0, which conveniently also represents
3832+
// initialized memory.
3833+
//
3834+
// AVX512 masked stores are lowered to Intrinsic::masked_load and are handled
3835+
// by handleMaskedStore.
3836+
//
3837+
// We do not combine this with handleMaskedLoad; see comment in
3838+
// handleAVXMaskedStore for the rationale.
3839+
//
3840+
// This is subtly different than handleIntrinsicByApplyingToShadow(I, 1)
3841+
// because we need to apply getShadowOriginPtr, not getShadow, to the first
3842+
// parameter.
3843+
void handleAVXMaskedLoad(IntrinsicInst &I) {
3844+
IRBuilder<> IRB(&I);
3845+
3846+
Value *Src = I.getArgOperand(0);
3847+
assert(Src->getType()->isPointerTy() && "Source is not a pointer!");
3848+
3849+
Value *Mask = I.getArgOperand(1);
3850+
assert(isa<VectorType>(Mask->getType()) && "Mask is not a vector!");
3851+
3852+
const Align Alignment = Align(1);
3853+
3854+
if (ClCheckAccessAddress) {
3855+
insertShadowCheck(Mask, &I);
3856+
}
3857+
3858+
Type *SrcShadowTy = getShadowTy(Src);
3859+
Value *SrcShadowPtr, *SrcOriginPtr;
3860+
std::tie(SrcShadowPtr, SrcOriginPtr) =
3861+
getShadowOriginPtr(Src, IRB, SrcShadowTy, Alignment, /*isStore*/ false);
3862+
3863+
SmallVector<Value *, 2> ShadowArgs;
3864+
ShadowArgs.append(1, SrcShadowPtr);
3865+
ShadowArgs.append(1, Mask);
3866+
3867+
CallInst *CI =
3868+
IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(), ShadowArgs);
3869+
// The intrinsic may require floating-point but shadows can be arbitrary
3870+
// bit patterns, of which some would be interpreted as "invalid"
3871+
// floating-point values (NaN etc.); we assume the intrinsic will happily
3872+
// copy them.
3873+
setShadow(&I, IRB.CreateBitCast(CI, getShadowTy(&I)));
3874+
3875+
if (!MS.TrackOrigins)
3876+
return;
3877+
3878+
// The "pass-through" value is always zero (initialized). To the extent
3879+
// that that results in initialized aligned 4-byte chunks, the origin value
3880+
// is ignored. It is therefore correct to simply copy the origin from src.
3881+
Value *PtrSrcOrigin = IRB.CreateLoad(MS.OriginTy, SrcOriginPtr);
3882+
setOrigin(&I, PtrSrcOrigin);
3883+
}
3884+
37573885
// Instrument BMI / BMI2 intrinsics.
37583886
// All of these intrinsics are Z = I(X, Y)
37593887
// where the types of all operands and the result match, and are either i32 or
@@ -4466,6 +4594,30 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
44664594
break;
44674595
}
44684596

4597+
case Intrinsic::x86_avx_maskstore_ps:
4598+
case Intrinsic::x86_avx_maskstore_pd:
4599+
case Intrinsic::x86_avx_maskstore_ps_256:
4600+
case Intrinsic::x86_avx_maskstore_pd_256:
4601+
case Intrinsic::x86_avx2_maskstore_d:
4602+
case Intrinsic::x86_avx2_maskstore_q:
4603+
case Intrinsic::x86_avx2_maskstore_d_256:
4604+
case Intrinsic::x86_avx2_maskstore_q_256: {
4605+
handleAVXMaskedStore(I);
4606+
break;
4607+
}
4608+
4609+
case Intrinsic::x86_avx_maskload_ps:
4610+
case Intrinsic::x86_avx_maskload_pd:
4611+
case Intrinsic::x86_avx_maskload_ps_256:
4612+
case Intrinsic::x86_avx_maskload_pd_256:
4613+
case Intrinsic::x86_avx2_maskload_d:
4614+
case Intrinsic::x86_avx2_maskload_q:
4615+
case Intrinsic::x86_avx2_maskload_d_256:
4616+
case Intrinsic::x86_avx2_maskload_q_256: {
4617+
handleAVXMaskedLoad(I);
4618+
break;
4619+
}
4620+
44694621
case Intrinsic::fshl:
44704622
case Intrinsic::fshr:
44714623
handleFunnelShift(I);

0 commit comments

Comments
 (0)