@@ -2608,38 +2608,79 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
2608
2608
// / e.g., <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16>)
2609
2609
// / <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8>, <16 x i8>)
2610
2610
// /
2611
- // / TODO: adapt this function to handle horizontal add/sub?
2612
- void handlePairwiseShadowOrIntrinsic (IntrinsicInst &I) {
2611
+ // / Optionally, reinterpret the parameters to have elements of a specified
2612
+ // / width. For example:
2613
+ // / @llvm.x86.ssse3.phadd.w(<1 x i64> [[VAR1]], <1 x i64> [[VAR2]])
2614
+ // / conceptually operates on
2615
+ // / (<4 x i16> [[VAR1]], <4 x i16> [[VAR2]])
2616
+ // / and can be handled with ReinterpretElemWidth == 16.
2617
+ void
2618
+ handlePairwiseShadowOrIntrinsic (IntrinsicInst &I,
2619
+ std::optional<int > ReinterpretElemWidth) {
2613
2620
assert (I.arg_size () == 1 || I.arg_size () == 2 );
2614
2621
2615
2622
assert (I.getType ()->isVectorTy ());
2616
2623
assert (I.getArgOperand (0 )->getType ()->isVectorTy ());
2617
2624
2618
2625
FixedVectorType *ParamType =
2619
2626
cast<FixedVectorType>(I.getArgOperand (0 )->getType ());
2620
- if (I.arg_size () == 2 )
2627
+ if (I.arg_size () == 2 ) {
2628
+ assert (I.getArgOperand (1 )->getType ()->isVectorTy ());
2621
2629
assert (ParamType == cast<FixedVectorType>(I.getArgOperand (1 )->getType ()));
2630
+ }
2631
+
2622
2632
[[maybe_unused]] FixedVectorType *ReturnType =
2623
2633
cast<FixedVectorType>(I.getType ());
2624
2634
assert (ParamType->getNumElements () * I.arg_size () ==
2625
2635
2 * ReturnType->getNumElements ());
2626
2636
2627
2637
IRBuilder<> IRB (&I);
2628
- unsigned Width = ParamType->getNumElements () * I.arg_size ();
2638
+
2639
+ unsigned TotalNumElems = ParamType->getNumElements () * I.arg_size ();
2640
+ FixedVectorType *ReinterpretShadowTy = nullptr ;
2641
+ if (ReinterpretElemWidth.has_value ()) {
2642
+ assert (ParamType->getPrimitiveSizeInBits () %
2643
+ ReinterpretElemWidth.value () ==
2644
+ 0 );
2645
+ ReinterpretShadowTy = FixedVectorType::get (
2646
+ IRB.getIntNTy (ReinterpretElemWidth.value ()),
2647
+ ParamType->getPrimitiveSizeInBits () / ReinterpretElemWidth.value ());
2648
+ TotalNumElems = ReinterpretShadowTy->getNumElements () * I.arg_size ();
2649
+ }
2629
2650
2630
2651
// Horizontal OR of shadow
2631
2652
SmallVector<int , 8 > EvenMask;
2632
2653
SmallVector<int , 8 > OddMask;
2633
- for (unsigned X = 0 ; X < Width ; X += 2 ) {
2654
+ for (unsigned X = 0 ; X + 1 < TotalNumElems ; X += 2 ) {
2634
2655
EvenMask.push_back (X);
2635
2656
OddMask.push_back (X + 1 );
2636
2657
}
2637
2658
2638
2659
Value *FirstArgShadow = getShadow (&I, 0 );
2660
+ if (ReinterpretShadowTy)
2661
+ FirstArgShadow = IRB.CreateBitCast (FirstArgShadow, ReinterpretShadowTy);
2662
+
2663
+ // If we had two parameters each with an odd number of elements, the total
2664
+ // number of elements is even, but we have never seen this in extant
2665
+ // instruction sets, so we enforce that each parameter must have an even
2666
+ // number of elements.
2667
+ assert (
2668
+ (cast<FixedVectorType>(FirstArgShadow->getType ())->getNumElements ()) %
2669
+ 2 ==
2670
+ 0 );
2671
+
2639
2672
Value *EvenShadow;
2640
2673
Value *OddShadow;
2641
2674
if (I.arg_size () == 2 ) {
2642
2675
Value *SecondArgShadow = getShadow (&I, 1 );
2676
+ if (ReinterpretShadowTy)
2677
+ SecondArgShadow =
2678
+ IRB.CreateBitCast (SecondArgShadow, ReinterpretShadowTy);
2679
+ assert ((cast<FixedVectorType>(SecondArgShadow->getType ())
2680
+ ->getNumElements ()) %
2681
+ 2 ==
2682
+ 0 );
2683
+
2643
2684
EvenShadow =
2644
2685
IRB.CreateShuffleVector (FirstArgShadow, SecondArgShadow, EvenMask);
2645
2686
OddShadow =
@@ -2653,6 +2694,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
2653
2694
OrShadow = CreateShadowCast (IRB, OrShadow, getShadowTy (&I));
2654
2695
2655
2696
setShadow (&I, OrShadow);
2697
+
2656
2698
setOriginForNaryOp (I);
2657
2699
}
2658
2700
@@ -4156,87 +4198,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
4156
4198
setOriginForNaryOp (I);
4157
4199
}
4158
4200
4159
- void handleAVXHorizontalAddSubIntrinsic (IntrinsicInst &I) {
4160
- // Approximation only:
4161
- // output = horizontal_add/sub(A, B)
4162
- // => shadow[output] = horizontal_add(shadow[A], shadow[B])
4163
- //
4164
- // We always use horizontal add instead of subtract, because subtracting
4165
- // a fully uninitialized shadow would result in a fully initialized shadow.
4166
- //
4167
- // - If we add two adjacent zero (initialized) shadow values, the
4168
- // result always be zero i.e., no false positives.
4169
- // - If we add two shadows, one of which is uninitialized, the
4170
- // result will always be non-zero i.e., no false negatives.
4171
- // - However, we can have false negatives if we do an addition that wraps
4172
- // to zero; we consider this an acceptable tradeoff for performance.
4173
- //
4174
- // To make shadow propagation precise, we want the equivalent of
4175
- // "horizontal OR", but this is not available for SSE3/SSSE3/AVX/AVX2.
4176
-
4177
- Intrinsic::ID shadowIntrinsicID = I.getIntrinsicID ();
4178
-
4179
- switch (I.getIntrinsicID ()) {
4180
- case Intrinsic::x86_sse3_hsub_ps:
4181
- shadowIntrinsicID = Intrinsic::x86_sse3_hadd_ps;
4182
- break ;
4183
-
4184
- case Intrinsic::x86_sse3_hsub_pd:
4185
- shadowIntrinsicID = Intrinsic::x86_sse3_hadd_pd;
4186
- break ;
4187
-
4188
- case Intrinsic::x86_ssse3_phsub_d:
4189
- shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_d;
4190
- break ;
4191
-
4192
- case Intrinsic::x86_ssse3_phsub_d_128:
4193
- shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_d_128;
4194
- break ;
4195
-
4196
- case Intrinsic::x86_ssse3_phsub_w:
4197
- shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_w;
4198
- break ;
4199
-
4200
- case Intrinsic::x86_ssse3_phsub_w_128:
4201
- shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_w_128;
4202
- break ;
4203
-
4204
- case Intrinsic::x86_ssse3_phsub_sw:
4205
- shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_sw;
4206
- break ;
4207
-
4208
- case Intrinsic::x86_ssse3_phsub_sw_128:
4209
- shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_sw_128;
4210
- break ;
4211
-
4212
- case Intrinsic::x86_avx_hsub_pd_256:
4213
- shadowIntrinsicID = Intrinsic::x86_avx_hadd_pd_256;
4214
- break ;
4215
-
4216
- case Intrinsic::x86_avx_hsub_ps_256:
4217
- shadowIntrinsicID = Intrinsic::x86_avx_hadd_ps_256;
4218
- break ;
4219
-
4220
- case Intrinsic::x86_avx2_phsub_d:
4221
- shadowIntrinsicID = Intrinsic::x86_avx2_phadd_d;
4222
- break ;
4223
-
4224
- case Intrinsic::x86_avx2_phsub_w:
4225
- shadowIntrinsicID = Intrinsic::x86_avx2_phadd_w;
4226
- break ;
4227
-
4228
- case Intrinsic::x86_avx2_phsub_sw:
4229
- shadowIntrinsicID = Intrinsic::x86_avx2_phadd_sw;
4230
- break ;
4231
-
4232
- default :
4233
- break ;
4234
- }
4235
-
4236
- return handleIntrinsicByApplyingToShadow (I, shadowIntrinsicID,
4237
- /* trailingVerbatimArgs*/ 0 );
4238
- }
4239
-
4240
4201
// / Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
4241
4202
// / and vst{2,3,4}lane).
4242
4203
// /
@@ -4783,33 +4744,49 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
4783
4744
handleVtestIntrinsic (I);
4784
4745
break ;
4785
4746
4786
- case Intrinsic::x86_sse3_hadd_ps:
4787
- case Intrinsic::x86_sse3_hadd_pd:
4788
- case Intrinsic::x86_ssse3_phadd_d:
4789
- case Intrinsic::x86_ssse3_phadd_d_128:
4747
+ // Packed Horizontal Add/Subtract
4790
4748
case Intrinsic::x86_ssse3_phadd_w:
4791
4749
case Intrinsic::x86_ssse3_phadd_w_128:
4750
+ case Intrinsic::x86_avx2_phadd_w:
4751
+ case Intrinsic::x86_ssse3_phsub_w:
4752
+ case Intrinsic::x86_ssse3_phsub_w_128:
4753
+ case Intrinsic::x86_avx2_phsub_w: {
4754
+ handlePairwiseShadowOrIntrinsic (I, /* ReinterpretElemWidth=*/ 16 );
4755
+ break ;
4756
+ }
4757
+
4758
+ // Packed Horizontal Add/Subtract
4759
+ case Intrinsic::x86_ssse3_phadd_d:
4760
+ case Intrinsic::x86_ssse3_phadd_d_128:
4761
+ case Intrinsic::x86_avx2_phadd_d:
4762
+ case Intrinsic::x86_ssse3_phsub_d:
4763
+ case Intrinsic::x86_ssse3_phsub_d_128:
4764
+ case Intrinsic::x86_avx2_phsub_d: {
4765
+ handlePairwiseShadowOrIntrinsic (I, /* ReinterpretElemWidth=*/ 32 );
4766
+ break ;
4767
+ }
4768
+
4769
+ // Packed Horizontal Add/Subtract and Saturate
4792
4770
case Intrinsic::x86_ssse3_phadd_sw:
4793
4771
case Intrinsic::x86_ssse3_phadd_sw_128:
4772
+ case Intrinsic::x86_avx2_phadd_sw:
4773
+ case Intrinsic::x86_ssse3_phsub_sw:
4774
+ case Intrinsic::x86_ssse3_phsub_sw_128:
4775
+ case Intrinsic::x86_avx2_phsub_sw: {
4776
+ handlePairwiseShadowOrIntrinsic (I, /* ReinterpretElemWidth=*/ 16 );
4777
+ break ;
4778
+ }
4779
+
4780
+ // Packed Single/Double Precision Floating-Point Horizontal Add
4781
+ case Intrinsic::x86_sse3_hadd_ps:
4782
+ case Intrinsic::x86_sse3_hadd_pd:
4794
4783
case Intrinsic::x86_avx_hadd_pd_256:
4795
4784
case Intrinsic::x86_avx_hadd_ps_256:
4796
- case Intrinsic::x86_avx2_phadd_d:
4797
- case Intrinsic::x86_avx2_phadd_w:
4798
- case Intrinsic::x86_avx2_phadd_sw:
4799
4785
case Intrinsic::x86_sse3_hsub_ps:
4800
4786
case Intrinsic::x86_sse3_hsub_pd:
4801
- case Intrinsic::x86_ssse3_phsub_d:
4802
- case Intrinsic::x86_ssse3_phsub_d_128:
4803
- case Intrinsic::x86_ssse3_phsub_w:
4804
- case Intrinsic::x86_ssse3_phsub_w_128:
4805
- case Intrinsic::x86_ssse3_phsub_sw:
4806
- case Intrinsic::x86_ssse3_phsub_sw_128:
4807
4787
case Intrinsic::x86_avx_hsub_pd_256:
4808
- case Intrinsic::x86_avx_hsub_ps_256:
4809
- case Intrinsic::x86_avx2_phsub_d:
4810
- case Intrinsic::x86_avx2_phsub_w:
4811
- case Intrinsic::x86_avx2_phsub_sw: {
4812
- handleAVXHorizontalAddSubIntrinsic (I);
4788
+ case Intrinsic::x86_avx_hsub_ps_256: {
4789
+ handlePairwiseShadowOrIntrinsic (I, /* ReinterpretElemWidth=*/ std::nullopt);
4813
4790
break ;
4814
4791
}
4815
4792
@@ -4869,7 +4846,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
4869
4846
// Add Long Pairwise
4870
4847
case Intrinsic::aarch64_neon_saddlp:
4871
4848
case Intrinsic::aarch64_neon_uaddlp: {
4872
- handlePairwiseShadowOrIntrinsic (I);
4849
+ handlePairwiseShadowOrIntrinsic (I, std::nullopt );
4873
4850
break ;
4874
4851
}
4875
4852
0 commit comments