@@ -3754,6 +3754,119 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
3754
3754
setOrigin (&I, Origin);
3755
3755
}
3756
3756
3757
+ // e.g., void @llvm.x86.avx.maskstore.ps.256(ptr, <8 x i32>, <8 x float>)
3758
+ // dst mask src
3759
+ //
3760
+ // Note: it is difficult to combine this function with handleMaskedStore. The
3761
+ // key challenge is that the LLVM masked intrinsics require a vector of
3762
+ // booleans, while AVX masked intrinsics use the MSBs of a vector of
3763
+ // integers. X86InstCombineIntrinsic.cpp::simplifyX86MaskedLoad mentions that
3764
+ // the x86 backend does not know how to efficiently convert from a vector of
3765
+ // booleans back into the AVX mask format; therefore, they (and we) do not
3766
+ // reduce AVX masked intrinsics into LLVM masked intrinsics.
3767
+ void handleAVXMaskedStore (IntrinsicInst &I) {
3768
+ IRBuilder<> IRB (&I);
3769
+
3770
+ Value *Dst = I.getArgOperand (0 );
3771
+ assert (Dst->getType ()->isPointerTy () && " Destination is not a pointer!" );
3772
+
3773
+ Value *Mask = I.getArgOperand (1 );
3774
+ assert (isa<VectorType>(Mask->getType ()) && " Mask is not a vector!" );
3775
+
3776
+ Value *Src = I.getArgOperand (2 );
3777
+ assert (isa<VectorType>(Src->getType ()) && " Source is not a vector!" );
3778
+
3779
+ const Align Alignment = Align (1 );
3780
+
3781
+ Value *SrcShadow = getShadow (Src);
3782
+
3783
+ if (ClCheckAccessAddress) {
3784
+ insertShadowCheck (Dst, &I);
3785
+ insertShadowCheck (Mask, &I);
3786
+ }
3787
+
3788
+ Value *DstShadowPtr;
3789
+ Value *DstOriginPtr;
3790
+ std::tie (DstShadowPtr, DstOriginPtr) = getShadowOriginPtr (
3791
+ Dst, IRB, SrcShadow->getType (), Alignment, /* isStore*/ true );
3792
+
3793
+ SmallVector<Value *, 2 > ShadowArgs;
3794
+ ShadowArgs.append (1 , DstShadowPtr);
3795
+ ShadowArgs.append (1 , Mask);
3796
+ // The intrinsic may require floating-point but shadows can be arbitrary
3797
+ // bit patterns, of which some would be interpreted as "invalid"
3798
+ // floating-point values (NaN etc.); we assume the intrinsic will happily
3799
+ // copy them.
3800
+ ShadowArgs.append (1 , IRB.CreateBitCast (SrcShadow, Src->getType ()));
3801
+
3802
+ CallInst *CI =
3803
+ IRB.CreateIntrinsic (IRB.getVoidTy (), I.getIntrinsicID (), ShadowArgs);
3804
+ setShadow (&I, CI);
3805
+
3806
+ if (!MS.TrackOrigins )
3807
+ return ;
3808
+
3809
+ // Approximation only
3810
+ auto &DL = F.getDataLayout ();
3811
+ paintOrigin (IRB, getOrigin (Src), DstOriginPtr,
3812
+ DL.getTypeStoreSize (SrcShadow->getType ()),
3813
+ std::max (Alignment, kMinOriginAlignment ));
3814
+ }
3815
+
3816
+ // e.g., <8 x float> @llvm.x86.avx.maskload.ps.256(ptr, <8 x i32>)
3817
+ // return src mask
3818
+ //
3819
+ // Masked-off values are replaced with 0, which conveniently also represents
3820
+ // initialized memory.
3821
+ //
3822
+ // We do not combine this with handleMaskedLoad; see comment in
3823
+ // handleAVXMaskedStore for the rationale.
3824
+ //
3825
+ // This is subtly different than handleIntrinsicByApplyingToShadow(I, 1)
3826
+ // because we need to apply getShadowOriginPtr, not getShadow, to the first
3827
+ // parameter.
3828
+ void handleAVXMaskedLoad (IntrinsicInst &I) {
3829
+ IRBuilder<> IRB (&I);
3830
+
3831
+ Value *Src = I.getArgOperand (0 );
3832
+ assert (Src->getType ()->isPointerTy () && " Source is not a pointer!" );
3833
+
3834
+ Value *Mask = I.getArgOperand (1 );
3835
+ assert (isa<VectorType>(Mask->getType ()) && " Mask is not a vector!" );
3836
+
3837
+ const Align Alignment = Align (1 );
3838
+
3839
+ if (ClCheckAccessAddress) {
3840
+ insertShadowCheck (Mask, &I);
3841
+ }
3842
+
3843
+ Type *SrcShadowTy = getShadowTy (Src);
3844
+ Value *SrcShadowPtr, *SrcOriginPtr;
3845
+ std::tie (SrcShadowPtr, SrcOriginPtr) =
3846
+ getShadowOriginPtr (Src, IRB, SrcShadowTy, Alignment, /* isStore*/ false );
3847
+
3848
+ SmallVector<Value *, 2 > ShadowArgs;
3849
+ ShadowArgs.append (1 , SrcShadowPtr);
3850
+ ShadowArgs.append (1 , Mask);
3851
+
3852
+ CallInst *CI =
3853
+ IRB.CreateIntrinsic (I.getType (), I.getIntrinsicID (), ShadowArgs);
3854
+ // The intrinsic may require floating-point but shadows can be arbitrary
3855
+ // bit patterns, of which some would be interpreted as "invalid"
3856
+ // floating-point values (NaN etc.); we assume the intrinsic will happily
3857
+ // copy them.
3858
+ setShadow (&I, IRB.CreateBitCast (CI, getShadowTy (&I)));
3859
+
3860
+ if (!MS.TrackOrigins )
3861
+ return ;
3862
+
3863
+ // The "pass-through" value is always zero (initialized). To the extent
3864
+ // that that results in initialized aligned 4-byte chunks, the origin value
3865
+ // is ignored. It is therefore correct to simply copy the origin from src.
3866
+ Value *PtrSrcOrigin = IRB.CreateLoad (MS.OriginTy , SrcOriginPtr);
3867
+ setOrigin (&I, PtrSrcOrigin);
3868
+ }
3869
+
3757
3870
// Instrument BMI / BMI2 intrinsics.
3758
3871
// All of these intrinsics are Z = I(X, Y)
3759
3872
// where the types of all operands and the result match, and are either i32 or
@@ -4466,6 +4579,30 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
4466
4579
break ;
4467
4580
}
4468
4581
4582
+ case Intrinsic::x86_avx_maskstore_ps:
4583
+ case Intrinsic::x86_avx_maskstore_pd:
4584
+ case Intrinsic::x86_avx_maskstore_ps_256:
4585
+ case Intrinsic::x86_avx_maskstore_pd_256:
4586
+ case Intrinsic::x86_avx2_maskstore_d:
4587
+ case Intrinsic::x86_avx2_maskstore_q:
4588
+ case Intrinsic::x86_avx2_maskstore_d_256:
4589
+ case Intrinsic::x86_avx2_maskstore_q_256: {
4590
+ handleAVXMaskedStore (I);
4591
+ break ;
4592
+ }
4593
+
4594
+ case Intrinsic::x86_avx_maskload_ps:
4595
+ case Intrinsic::x86_avx_maskload_pd:
4596
+ case Intrinsic::x86_avx_maskload_ps_256:
4597
+ case Intrinsic::x86_avx_maskload_pd_256:
4598
+ case Intrinsic::x86_avx2_maskload_d:
4599
+ case Intrinsic::x86_avx2_maskload_q:
4600
+ case Intrinsic::x86_avx2_maskload_d_256:
4601
+ case Intrinsic::x86_avx2_maskload_q_256: {
4602
+ handleAVXMaskedLoad (I);
4603
+ break ;
4604
+ }
4605
+
4469
4606
case Intrinsic::fshl:
4470
4607
case Intrinsic::fshr:
4471
4608
handleFunnelShift (I);
0 commit comments