Skip to content

Commit 071f3f4

Browse files
committed
[msan] Add handlers for AVX masked load/store intrinsics
This patch adds explicit support for AVX masked load/store intrinsics, largely by applying the intrinsics to the shadows (but subtly different to handleIntrinsicByApplyingToShadow()). We do not reuse the handleMaskedLoad/Store functions. The key challenge is that the LLVM masked intrinsics require a vector of booleans, while AVX masked intrinsics use the MSBs of a vector of integers. X86InstCombineIntrinsic.cpp::simplifyX86MaskedLoad mentions that the x86 backend does not know how to efficiently convert from a vector of booleans back into the AVX mask format; therefore, they (and we) do not reduce AVX masked intrinsics into LLVM masked intrinsics.
1 parent 1f26ac1 commit 071f3f4

File tree

5 files changed

+473
-288
lines changed

5 files changed

+473
-288
lines changed

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3754,6 +3754,119 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
37543754
setOrigin(&I, Origin);
37553755
}
37563756

3757+
// e.g., void @llvm.x86.avx.maskstore.ps.256(ptr, <8 x i32>, <8 x float>)
3758+
// dst mask src
3759+
//
3760+
// Note: it is difficult to combine this function with handleMaskedStore. The
3761+
// key challenge is that the LLVM masked intrinsics require a vector of
3762+
// booleans, while AVX masked intrinsics use the MSBs of a vector of
3763+
// integers. X86InstCombineIntrinsic.cpp::simplifyX86MaskedLoad mentions that
3764+
// the x86 backend does not know how to efficiently convert from a vector of
3765+
// booleans back into the AVX mask format; therefore, they (and we) do not
3766+
// reduce AVX masked intrinsics into LLVM masked intrinsics.
3767+
void handleAVXMaskedStore(IntrinsicInst &I) {
3768+
IRBuilder<> IRB(&I);
3769+
3770+
Value *Dst = I.getArgOperand(0);
3771+
assert(Dst->getType()->isPointerTy() && "Destination is not a pointer!");
3772+
3773+
Value *Mask = I.getArgOperand(1);
3774+
assert(isa<VectorType>(Mask->getType()) && "Mask is not a vector!");
3775+
3776+
Value *Src = I.getArgOperand(2);
3777+
assert(isa<VectorType>(Src->getType()) && "Source is not a vector!");
3778+
3779+
const Align Alignment = Align(1);
3780+
3781+
Value *SrcShadow = getShadow(Src);
3782+
3783+
if (ClCheckAccessAddress) {
3784+
insertShadowCheck(Dst, &I);
3785+
insertShadowCheck(Mask, &I);
3786+
}
3787+
3788+
Value *DstShadowPtr;
3789+
Value *DstOriginPtr;
3790+
std::tie(DstShadowPtr, DstOriginPtr) = getShadowOriginPtr(
3791+
Dst, IRB, SrcShadow->getType(), Alignment, /*isStore*/ true);
3792+
3793+
SmallVector<Value *, 2> ShadowArgs;
3794+
ShadowArgs.append(1, DstShadowPtr);
3795+
ShadowArgs.append(1, Mask);
3796+
// The intrinsic may require floating-point but shadows can be arbitrary
3797+
// bit patterns, of which some would be interpreted as "invalid"
3798+
// floating-point values (NaN etc.); we assume the intrinsic will happily
3799+
// copy them.
3800+
ShadowArgs.append(1, IRB.CreateBitCast(SrcShadow, Src->getType()));
3801+
3802+
CallInst *CI =
3803+
IRB.CreateIntrinsic(IRB.getVoidTy(), I.getIntrinsicID(), ShadowArgs);
3804+
setShadow(&I, CI);
3805+
3806+
if (!MS.TrackOrigins)
3807+
return;
3808+
3809+
// Approximation only
3810+
auto &DL = F.getDataLayout();
3811+
paintOrigin(IRB, getOrigin(Src), DstOriginPtr,
3812+
DL.getTypeStoreSize(SrcShadow->getType()),
3813+
std::max(Alignment, kMinOriginAlignment));
3814+
}
3815+
3816+
// e.g., <8 x float> @llvm.x86.avx.maskload.ps.256(ptr, <8 x i32>)
3817+
// return src mask
3818+
//
3819+
// Masked-off values are replaced with 0, which conveniently also represents
3820+
// initialized memory.
3821+
//
3822+
// We do not combine this with handleMaskedLoad; see comment in
3823+
// handleAVXMaskedStore for the rationale.
3824+
//
3825+
// This is subtly different than handleIntrinsicByApplyingToShadow(I, 1)
3826+
// because we need to apply getShadowOriginPtr, not getShadow, to the first
3827+
// parameter.
3828+
void handleAVXMaskedLoad(IntrinsicInst &I) {
3829+
IRBuilder<> IRB(&I);
3830+
3831+
Value *Src = I.getArgOperand(0);
3832+
assert(Src->getType()->isPointerTy() && "Source is not a pointer!");
3833+
3834+
Value *Mask = I.getArgOperand(1);
3835+
assert(isa<VectorType>(Mask->getType()) && "Mask is not a vector!");
3836+
3837+
const Align Alignment = Align(1);
3838+
3839+
if (ClCheckAccessAddress) {
3840+
insertShadowCheck(Mask, &I);
3841+
}
3842+
3843+
Type *SrcShadowTy = getShadowTy(Src);
3844+
Value *SrcShadowPtr, *SrcOriginPtr;
3845+
std::tie(SrcShadowPtr, SrcOriginPtr) =
3846+
getShadowOriginPtr(Src, IRB, SrcShadowTy, Alignment, /*isStore*/ false);
3847+
3848+
SmallVector<Value *, 2> ShadowArgs;
3849+
ShadowArgs.append(1, SrcShadowPtr);
3850+
ShadowArgs.append(1, Mask);
3851+
3852+
CallInst *CI =
3853+
IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(), ShadowArgs);
3854+
// The intrinsic may require floating-point but shadows can be arbitrary
3855+
// bit patterns, of which some would be interpreted as "invalid"
3856+
// floating-point values (NaN etc.); we assume the intrinsic will happily
3857+
// copy them.
3858+
setShadow(&I, IRB.CreateBitCast(CI, getShadowTy(&I)));
3859+
3860+
if (!MS.TrackOrigins)
3861+
return;
3862+
3863+
// The "pass-through" value is always zero (initialized). To the extent
3864+
// that that results in initialized aligned 4-byte chunks, the origin value
3865+
// is ignored. It is therefore correct to simply copy the origin from src.
3866+
Value *PtrSrcOrigin = IRB.CreateLoad(MS.OriginTy, SrcOriginPtr);
3867+
setOrigin(&I, PtrSrcOrigin);
3868+
}
3869+
37573870
// Instrument BMI / BMI2 intrinsics.
37583871
// All of these intrinsics are Z = I(X, Y)
37593872
// where the types of all operands and the result match, and are either i32 or
@@ -4466,6 +4579,30 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
44664579
break;
44674580
}
44684581

4582+
case Intrinsic::x86_avx_maskstore_ps:
4583+
case Intrinsic::x86_avx_maskstore_pd:
4584+
case Intrinsic::x86_avx_maskstore_ps_256:
4585+
case Intrinsic::x86_avx_maskstore_pd_256:
4586+
case Intrinsic::x86_avx2_maskstore_d:
4587+
case Intrinsic::x86_avx2_maskstore_q:
4588+
case Intrinsic::x86_avx2_maskstore_d_256:
4589+
case Intrinsic::x86_avx2_maskstore_q_256: {
4590+
handleAVXMaskedStore(I);
4591+
break;
4592+
}
4593+
4594+
case Intrinsic::x86_avx_maskload_ps:
4595+
case Intrinsic::x86_avx_maskload_pd:
4596+
case Intrinsic::x86_avx_maskload_ps_256:
4597+
case Intrinsic::x86_avx_maskload_pd_256:
4598+
case Intrinsic::x86_avx2_maskload_d:
4599+
case Intrinsic::x86_avx2_maskload_q:
4600+
case Intrinsic::x86_avx2_maskload_d_256:
4601+
case Intrinsic::x86_avx2_maskload_q_256: {
4602+
handleAVXMaskedLoad(I);
4603+
break;
4604+
}
4605+
44694606
case Intrinsic::fshl:
44704607
case Intrinsic::fshr:
44714608
handleFunnelShift(I);

0 commit comments

Comments
 (0)