@@ -3046,7 +3046,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
3046
3046
if (maybeHandleSimpleNomemIntrinsic (I))
3047
3047
return true ;
3048
3048
3049
- // FIXME: detect and handle SSE maskstore/maskload
3049
+ // FIXME: detect and handle SSE maskstore/maskload?
3050
+ // Some cases are now handled in handleAVXMasked{Load,Store}.
3050
3051
return false ;
3051
3052
}
3052
3053
@@ -3683,6 +3684,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
3683
3684
// TODO: Store origin.
3684
3685
}
3685
3686
3687
+ // Intrinsic::masked_store
3688
+ //
3689
+ // Note: handleAVXMaskedStore handles AVX/AVX2 variants, though AVX512 masked
3690
+ // stores are lowered to Intrinsic::masked_store.
3686
3691
void handleMaskedStore (IntrinsicInst &I) {
3687
3692
IRBuilder<> IRB (&I);
3688
3693
Value *V = I.getArgOperand (0 );
@@ -3713,6 +3718,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
3713
3718
std::max (Alignment, kMinOriginAlignment ));
3714
3719
}
3715
3720
3721
+ // Intrinsic::masked_load
3722
+ //
3723
+ // Note: handleAVXMaskedLoad handles AVX/AVX2 variants, though AVX512 masked
3724
+ // loads are lowered to Intrinsic::masked_load.
3716
3725
void handleMaskedLoad (IntrinsicInst &I) {
3717
3726
IRBuilder<> IRB (&I);
3718
3727
Value *Ptr = I.getArgOperand (0 );
@@ -3754,6 +3763,125 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
3754
3763
setOrigin (&I, Origin);
3755
3764
}
3756
3765
3766
+ // e.g., void @llvm.x86.avx.maskstore.ps.256(ptr, <8 x i32>, <8 x float>)
3767
+ // dst mask src
3768
+ //
3769
+ // AVX512 masked stores are lowered to Intrinsic::masked_load and are handled
3770
+ // by handleMaskedStore.
3771
+ //
3772
+ // This function handles AVX and AVX2 masked stores; these use the MSBs of a
3773
+ // vector of integers, unlike the LLVM masked intrinsics, which require a
3774
+ // vector of booleans. X86InstCombineIntrinsic.cpp::simplifyX86MaskedLoad
3775
+ // mentions that the x86 backend does not know how to efficiently convert
3776
+ // from a vector of booleans back into the AVX mask format; therefore, they
3777
+ // (and we) do not reduce AVX/AVX2 masked intrinsics into LLVM masked
3778
+ // intrinsics.
3779
+ void handleAVXMaskedStore (IntrinsicInst &I) {
3780
+ IRBuilder<> IRB (&I);
3781
+
3782
+ Value *Dst = I.getArgOperand (0 );
3783
+ assert (Dst->getType ()->isPointerTy () && " Destination is not a pointer!" );
3784
+
3785
+ Value *Mask = I.getArgOperand (1 );
3786
+ assert (isa<VectorType>(Mask->getType ()) && " Mask is not a vector!" );
3787
+
3788
+ Value *Src = I.getArgOperand (2 );
3789
+ assert (isa<VectorType>(Src->getType ()) && " Source is not a vector!" );
3790
+
3791
+ const Align Alignment = Align (1 );
3792
+
3793
+ Value *SrcShadow = getShadow (Src);
3794
+
3795
+ if (ClCheckAccessAddress) {
3796
+ insertShadowCheck (Dst, &I);
3797
+ insertShadowCheck (Mask, &I);
3798
+ }
3799
+
3800
+ Value *DstShadowPtr;
3801
+ Value *DstOriginPtr;
3802
+ std::tie (DstShadowPtr, DstOriginPtr) = getShadowOriginPtr (
3803
+ Dst, IRB, SrcShadow->getType (), Alignment, /* isStore*/ true );
3804
+
3805
+ SmallVector<Value *, 2 > ShadowArgs;
3806
+ ShadowArgs.append (1 , DstShadowPtr);
3807
+ ShadowArgs.append (1 , Mask);
3808
+ // The intrinsic may require floating-point but shadows can be arbitrary
3809
+ // bit patterns, of which some would be interpreted as "invalid"
3810
+ // floating-point values (NaN etc.); we assume the intrinsic will happily
3811
+ // copy them.
3812
+ ShadowArgs.append (1 , IRB.CreateBitCast (SrcShadow, Src->getType ()));
3813
+
3814
+ CallInst *CI =
3815
+ IRB.CreateIntrinsic (IRB.getVoidTy (), I.getIntrinsicID (), ShadowArgs);
3816
+ setShadow (&I, CI);
3817
+
3818
+ if (!MS.TrackOrigins )
3819
+ return ;
3820
+
3821
+ // Approximation only
3822
+ auto &DL = F.getDataLayout ();
3823
+ paintOrigin (IRB, getOrigin (Src), DstOriginPtr,
3824
+ DL.getTypeStoreSize (SrcShadow->getType ()),
3825
+ std::max (Alignment, kMinOriginAlignment ));
3826
+ }
3827
+
3828
+ // e.g., <8 x float> @llvm.x86.avx.maskload.ps.256(ptr, <8 x i32>)
3829
+ // return src mask
3830
+ //
3831
+ // Masked-off values are replaced with 0, which conveniently also represents
3832
+ // initialized memory.
3833
+ //
3834
+ // AVX512 masked stores are lowered to Intrinsic::masked_load and are handled
3835
+ // by handleMaskedStore.
3836
+ //
3837
+ // We do not combine this with handleMaskedLoad; see comment in
3838
+ // handleAVXMaskedStore for the rationale.
3839
+ //
3840
+ // This is subtly different than handleIntrinsicByApplyingToShadow(I, 1)
3841
+ // because we need to apply getShadowOriginPtr, not getShadow, to the first
3842
+ // parameter.
3843
+ void handleAVXMaskedLoad (IntrinsicInst &I) {
3844
+ IRBuilder<> IRB (&I);
3845
+
3846
+ Value *Src = I.getArgOperand (0 );
3847
+ assert (Src->getType ()->isPointerTy () && " Source is not a pointer!" );
3848
+
3849
+ Value *Mask = I.getArgOperand (1 );
3850
+ assert (isa<VectorType>(Mask->getType ()) && " Mask is not a vector!" );
3851
+
3852
+ const Align Alignment = Align (1 );
3853
+
3854
+ if (ClCheckAccessAddress) {
3855
+ insertShadowCheck (Mask, &I);
3856
+ }
3857
+
3858
+ Type *SrcShadowTy = getShadowTy (Src);
3859
+ Value *SrcShadowPtr, *SrcOriginPtr;
3860
+ std::tie (SrcShadowPtr, SrcOriginPtr) =
3861
+ getShadowOriginPtr (Src, IRB, SrcShadowTy, Alignment, /* isStore*/ false );
3862
+
3863
+ SmallVector<Value *, 2 > ShadowArgs;
3864
+ ShadowArgs.append (1 , SrcShadowPtr);
3865
+ ShadowArgs.append (1 , Mask);
3866
+
3867
+ CallInst *CI =
3868
+ IRB.CreateIntrinsic (I.getType (), I.getIntrinsicID (), ShadowArgs);
3869
+ // The intrinsic may require floating-point but shadows can be arbitrary
3870
+ // bit patterns, of which some would be interpreted as "invalid"
3871
+ // floating-point values (NaN etc.); we assume the intrinsic will happily
3872
+ // copy them.
3873
+ setShadow (&I, IRB.CreateBitCast (CI, getShadowTy (&I)));
3874
+
3875
+ if (!MS.TrackOrigins )
3876
+ return ;
3877
+
3878
+ // The "pass-through" value is always zero (initialized). To the extent
3879
+ // that that results in initialized aligned 4-byte chunks, the origin value
3880
+ // is ignored. It is therefore correct to simply copy the origin from src.
3881
+ Value *PtrSrcOrigin = IRB.CreateLoad (MS.OriginTy , SrcOriginPtr);
3882
+ setOrigin (&I, PtrSrcOrigin);
3883
+ }
3884
+
3757
3885
// Instrument BMI / BMI2 intrinsics.
3758
3886
// All of these intrinsics are Z = I(X, Y)
3759
3887
// where the types of all operands and the result match, and are either i32 or
@@ -4466,6 +4594,30 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
4466
4594
break ;
4467
4595
}
4468
4596
4597
+ case Intrinsic::x86_avx_maskstore_ps:
4598
+ case Intrinsic::x86_avx_maskstore_pd:
4599
+ case Intrinsic::x86_avx_maskstore_ps_256:
4600
+ case Intrinsic::x86_avx_maskstore_pd_256:
4601
+ case Intrinsic::x86_avx2_maskstore_d:
4602
+ case Intrinsic::x86_avx2_maskstore_q:
4603
+ case Intrinsic::x86_avx2_maskstore_d_256:
4604
+ case Intrinsic::x86_avx2_maskstore_q_256: {
4605
+ handleAVXMaskedStore (I);
4606
+ break ;
4607
+ }
4608
+
4609
+ case Intrinsic::x86_avx_maskload_ps:
4610
+ case Intrinsic::x86_avx_maskload_pd:
4611
+ case Intrinsic::x86_avx_maskload_ps_256:
4612
+ case Intrinsic::x86_avx_maskload_pd_256:
4613
+ case Intrinsic::x86_avx2_maskload_d:
4614
+ case Intrinsic::x86_avx2_maskload_q:
4615
+ case Intrinsic::x86_avx2_maskload_d_256:
4616
+ case Intrinsic::x86_avx2_maskload_q_256: {
4617
+ handleAVXMaskedLoad (I);
4618
+ break ;
4619
+ }
4620
+
4469
4621
case Intrinsic::fshl:
4470
4622
case Intrinsic::fshr:
4471
4623
handleFunnelShift (I);
0 commit comments