Skip to content

Commit 4a42607

Browse files
authored
[msan] Use horizontal add to compute shadow for horizontal sub (#124835)
This improves the horizontal sub handling (from #124159), by always using horizontal add for the shadow, as recommended by Vitaly. Fixes #124662
1 parent 548ecde commit 4a42607

File tree

7 files changed

+89
-27
lines changed

7 files changed

+89
-27
lines changed

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 73 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4037,21 +4037,83 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
40374037

40384038
void handleAVXHorizontalAddSubIntrinsic(IntrinsicInst &I) {
40394039
// Approximation only:
4040-
// output = horizontal_add(A, B)
4040+
// output = horizontal_add/sub(A, B)
40414041
// => shadow[output] = horizontal_add(shadow[A], shadow[B])
40424042
//
4043-
// - If we add/subtract two adjacent zero (initialized) shadow values, the
4043+
// We always use horizontal add instead of subtract, because subtracting
4044+
// a fully uninitialized shadow would result in a fully initialized shadow.
4045+
//
4046+
// - If we add two adjacent zero (initialized) shadow values, the
40444047
// result always be zero i.e., no false positives.
4045-
// - If we add/subtract two shadows, one of which is uninitialized, the
4046-
// result will always be non-zero i.e., no false negative.
4047-
// - However, we can have false negatives if we subtract two non-zero
4048-
// shadows of the same value (or do an addition that wraps to zero); we
4049-
// consider this an acceptable tradeoff for performance.
4048+
// - If we add two shadows, one of which is uninitialized, the
4049+
// result will always be non-zero i.e., no false negatives.
4050+
// - However, we can have false negatives if we do an addition that wraps
4051+
// to zero; we consider this an acceptable tradeoff for performance.
4052+
//
40504053
// To make shadow propagation precise, we want the equivalent of
4051-
// "horizontal OR", but this is not available.
4052-
return handleIntrinsicByApplyingToShadow(
4053-
I, /*shadowIntrinsicID=*/I.getIntrinsicID(),
4054-
/*trailingVerbatimArgs*/ 0);
4054+
// "horizontal OR", but this is not available for SSE3/SSSE3/AVX/AVX2.
4055+
4056+
Intrinsic::ID shadowIntrinsicID = I.getIntrinsicID();
4057+
4058+
switch (I.getIntrinsicID()) {
4059+
case Intrinsic::x86_sse3_hsub_ps:
4060+
shadowIntrinsicID = Intrinsic::x86_sse3_hadd_ps;
4061+
break;
4062+
4063+
case Intrinsic::x86_sse3_hsub_pd:
4064+
shadowIntrinsicID = Intrinsic::x86_sse3_hadd_pd;
4065+
break;
4066+
4067+
case Intrinsic::x86_ssse3_phsub_d:
4068+
shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_d;
4069+
break;
4070+
4071+
case Intrinsic::x86_ssse3_phsub_d_128:
4072+
shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_d_128;
4073+
break;
4074+
4075+
case Intrinsic::x86_ssse3_phsub_w:
4076+
shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_w;
4077+
break;
4078+
4079+
case Intrinsic::x86_ssse3_phsub_w_128:
4080+
shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_w_128;
4081+
break;
4082+
4083+
case Intrinsic::x86_ssse3_phsub_sw:
4084+
shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_sw;
4085+
break;
4086+
4087+
case Intrinsic::x86_ssse3_phsub_sw_128:
4088+
shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_sw_128;
4089+
break;
4090+
4091+
case Intrinsic::x86_avx_hsub_pd_256:
4092+
shadowIntrinsicID = Intrinsic::x86_avx_hadd_pd_256;
4093+
break;
4094+
4095+
case Intrinsic::x86_avx_hsub_ps_256:
4096+
shadowIntrinsicID = Intrinsic::x86_avx_hadd_ps_256;
4097+
break;
4098+
4099+
case Intrinsic::x86_avx2_phsub_d:
4100+
shadowIntrinsicID = Intrinsic::x86_avx2_phadd_d;
4101+
break;
4102+
4103+
case Intrinsic::x86_avx2_phsub_w:
4104+
shadowIntrinsicID = Intrinsic::x86_avx2_phadd_w;
4105+
break;
4106+
4107+
case Intrinsic::x86_avx2_phsub_sw:
4108+
shadowIntrinsicID = Intrinsic::x86_avx2_phadd_sw;
4109+
break;
4110+
4111+
default:
4112+
break;
4113+
}
4114+
4115+
return handleIntrinsicByApplyingToShadow(I, shadowIntrinsicID,
4116+
/*trailingVerbatimArgs*/ 0);
40554117
}
40564118

40574119
/// Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},

llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,7 @@ define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1
475475
; CHECK-NEXT: call void @llvm.donothing()
476476
; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i64> [[TMP1]] to <4 x double>
477477
; CHECK-NEXT: [[A1:%.*]] = bitcast <4 x i64> [[TMP2]] to <4 x double>
478-
; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> [[A0]], <4 x double> [[A1]])
478+
; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> [[A0]], <4 x double> [[A1]])
479479
; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast <4 x double> [[RES]] to <4 x i64>
480480
; CHECK-NEXT: [[RES1:%.*]] = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> [[A2:%.*]], <4 x double> [[A3:%.*]])
481481
; CHECK-NEXT: store <4 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
@@ -494,7 +494,7 @@ define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) #
494494
; CHECK-NEXT: call void @llvm.donothing()
495495
; CHECK-NEXT: [[A0:%.*]] = bitcast <8 x i32> [[TMP1]] to <8 x float>
496496
; CHECK-NEXT: [[A1:%.*]] = bitcast <8 x i32> [[TMP2]] to <8 x float>
497-
; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> [[A0]], <8 x float> [[A1]])
497+
; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> [[A0]], <8 x float> [[A1]])
498498
; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast <8 x float> [[RES]] to <8 x i32>
499499
; CHECK-NEXT: [[RES1:%.*]] = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> [[A2:%.*]], <8 x float> [[A3:%.*]])
500500
; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8

llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -617,7 +617,7 @@ define <8 x i32> @test_x86_avx2_phsub_d(<8 x i32> %a0, <8 x i32> %a1) #0 {
617617
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
618618
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
619619
; CHECK-NEXT: call void @llvm.donothing()
620-
; CHECK-NEXT: [[_MSPROP:%.*]] = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
620+
; CHECK-NEXT: [[_MSPROP:%.*]] = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
621621
; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]])
622622
; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
623623
; CHECK-NEXT: ret <8 x i32> [[RES]]
@@ -633,7 +633,7 @@ define <16 x i16> @test_x86_avx2_phsub_sw(<16 x i16> %a0, <16 x i16> %a1) #0 {
633633
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
634634
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
635635
; CHECK-NEXT: call void @llvm.donothing()
636-
; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
636+
; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
637637
; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
638638
; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
639639
; CHECK-NEXT: ret <16 x i16> [[RES]]
@@ -649,7 +649,7 @@ define <16 x i16> @test_x86_avx2_phsub_w(<16 x i16> %a0, <16 x i16> %a1) #0 {
649649
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
650650
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
651651
; CHECK-NEXT: call void @llvm.donothing()
652-
; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
652+
; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
653653
; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
654654
; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
655655
; CHECK-NEXT: ret <16 x i16> [[RES]]

llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3339,7 +3339,7 @@ define i64 @test6(<1 x i64> %a, <1 x i64> %b) #0 {
33393339
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
33403340
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64>
33413341
; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
3342-
; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
3342+
; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
33433343
; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
33443344
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
33453345
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16>
@@ -3379,7 +3379,7 @@ define i64 @test5(<1 x i64> %a, <1 x i64> %b) #0 {
33793379
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
33803380
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP12]] to <1 x i64>
33813381
; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
3382-
; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
3382+
; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
33833383
; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
33843384
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
33853385
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <2 x i32>
@@ -3419,7 +3419,7 @@ define i64 @test4(<1 x i64> %a, <1 x i64> %b) #0 {
34193419
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
34203420
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64>
34213421
; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
3422-
; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
3422+
; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
34233423
; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
34243424
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
34253425
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16>

llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,7 @@ define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1
491491
; CHECK-NEXT: call void @llvm.donothing()
492492
; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i64> [[TMP1]] to <4 x double>
493493
; CHECK-NEXT: [[A1:%.*]] = bitcast <4 x i64> [[TMP2]] to <4 x double>
494-
; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> [[A0]], <4 x double> [[A1]])
494+
; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> [[A0]], <4 x double> [[A1]])
495495
; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast <4 x double> [[RES]] to <4 x i64>
496496
; CHECK-NEXT: [[RES1:%.*]] = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> [[A2:%.*]], <4 x double> [[A3:%.*]])
497497
; CHECK-NEXT: store <4 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
@@ -511,7 +511,7 @@ define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) #
511511
; CHECK-NEXT: call void @llvm.donothing()
512512
; CHECK-NEXT: [[A0:%.*]] = bitcast <8 x i32> [[TMP1]] to <8 x float>
513513
; CHECK-NEXT: [[A1:%.*]] = bitcast <8 x i32> [[TMP2]] to <8 x float>
514-
; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> [[A0]], <8 x float> [[A1]])
514+
; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> [[A0]], <8 x float> [[A1]])
515515
; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast <8 x float> [[RES]] to <8 x i32>
516516
; CHECK-NEXT: [[RES1:%.*]] = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> [[A2:%.*]], <8 x float> [[A3:%.*]])
517517
; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8

llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -651,7 +651,7 @@ define <8 x i32> @test_x86_avx2_phsub_d(<8 x i32> %a0, <8 x i32> %a1) #0 {
651651
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
652652
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
653653
; CHECK-NEXT: call void @llvm.donothing()
654-
; CHECK-NEXT: [[_MSPROP:%.*]] = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
654+
; CHECK-NEXT: [[_MSPROP:%.*]] = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
655655
; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]])
656656
; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
657657
; CHECK-NEXT: ret <8 x i32> [[RES]]
@@ -668,7 +668,7 @@ define <16 x i16> @test_x86_avx2_phsub_sw(<16 x i16> %a0, <16 x i16> %a1) #0 {
668668
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
669669
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
670670
; CHECK-NEXT: call void @llvm.donothing()
671-
; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
671+
; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
672672
; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
673673
; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
674674
; CHECK-NEXT: ret <16 x i16> [[RES]]
@@ -685,7 +685,7 @@ define <16 x i16> @test_x86_avx2_phsub_w(<16 x i16> %a0, <16 x i16> %a1) #0 {
685685
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
686686
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
687687
; CHECK-NEXT: call void @llvm.donothing()
688-
; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
688+
; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
689689
; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
690690
; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
691691
; CHECK-NEXT: ret <16 x i16> [[RES]]

llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3426,7 +3426,7 @@ define i64 @test6(<1 x i64> %a, <1 x i64> %b) #0 {
34263426
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
34273427
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64>
34283428
; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
3429-
; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
3429+
; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
34303430
; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
34313431
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
34323432
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16>
@@ -3467,7 +3467,7 @@ define i64 @test5(<1 x i64> %a, <1 x i64> %b) #0 {
34673467
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
34683468
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP12]] to <1 x i64>
34693469
; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
3470-
; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
3470+
; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
34713471
; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
34723472
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
34733473
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <2 x i32>
@@ -3508,7 +3508,7 @@ define i64 @test4(<1 x i64> %a, <1 x i64> %b) #0 {
35083508
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
35093509
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64>
35103510
; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
3511-
; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
3511+
; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
35123512
; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
35133513
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
35143514
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16>

0 commit comments

Comments
 (0)