Skip to content

Commit 4f5020f

Browse files
committed
DAG: Fix vector_shuffle -> splat fold defining undef lanes
For shuffle vector splats with undef lanes in the mask, this was introducing real values. Filter out build_vector results based on the undef elements in the mask. This avoids AMDGPU test regressions in a future change. test/CodeGen/X86/urem-seteq-illegal-types.ll looks worse but I didn't investigate.
1 parent 1297c11 commit 4f5020f

File tree

9 files changed

+76
-111
lines changed

9 files changed

+76
-111
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26373,9 +26373,17 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
2637326373
if (AllSame)
2637426374
return N0;
2637526375

26376-
// Canonicalize any other splat as a build_vector.
26376+
// Canonicalize any other splat as a build_vector, but avoid defining any
26377+
// undefined elements in the mask.
2637726378
SDValue Splatted = V->getOperand(SplatIndex);
2637826379
SmallVector<SDValue, 8> Ops(NumElts, Splatted);
26380+
EVT EltVT = Splatted.getValueType();
26381+
26382+
for (unsigned i = 0; i != NumElts; ++i) {
26383+
if (SVN->getMaskElt(i) < 0)
26384+
Ops[i] = DAG.getUNDEF(EltVT);
26385+
}
26386+
2637926387
SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
2638026388

2638126389
// We may have jumped through bitcasts, so the type of the

llvm/test/CodeGen/PowerPC/vector-reduce-fadd.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3628,31 +3628,31 @@ define dso_local ppc_fp128 @v2ppcf128_fast(<2 x ppc_fp128> %a) local_unnamed_add
36283628
; PWR9LE-LABEL: v2ppcf128_fast:
36293629
; PWR9LE: # %bb.0: # %entry
36303630
; PWR9LE-NEXT: mflr r0
3631-
; PWR9LE-NEXT: stdu r1, -64(r1)
3632-
; PWR9LE-NEXT: std r0, 80(r1)
3631+
; PWR9LE-NEXT: stdu r1, -48(r1)
3632+
; PWR9LE-NEXT: std r0, 64(r1)
36333633
; PWR9LE-NEXT: bl __gcc_qadd
36343634
; PWR9LE-NEXT: nop
36353635
; PWR9LE-NEXT: stfd f2, 40(r1)
36363636
; PWR9LE-NEXT: stfd f1, 32(r1)
36373637
; PWR9LE-NEXT: lxv vs1, 32(r1)
36383638
; PWR9LE-NEXT: xxswapd vs2, vs1
3639-
; PWR9LE-NEXT: addi r1, r1, 64
3639+
; PWR9LE-NEXT: addi r1, r1, 48
36403640
; PWR9LE-NEXT: ld r0, 16(r1)
36413641
; PWR9LE-NEXT: mtlr r0
36423642
; PWR9LE-NEXT: blr
36433643
;
36443644
; PWR9BE-LABEL: v2ppcf128_fast:
36453645
; PWR9BE: # %bb.0: # %entry
36463646
; PWR9BE-NEXT: mflr r0
3647-
; PWR9BE-NEXT: stdu r1, -144(r1)
3648-
; PWR9BE-NEXT: std r0, 160(r1)
3647+
; PWR9BE-NEXT: stdu r1, -128(r1)
3648+
; PWR9BE-NEXT: std r0, 144(r1)
36493649
; PWR9BE-NEXT: bl __gcc_qadd
36503650
; PWR9BE-NEXT: nop
36513651
; PWR9BE-NEXT: stfd f2, 120(r1)
36523652
; PWR9BE-NEXT: stfd f1, 112(r1)
36533653
; PWR9BE-NEXT: lxv vs1, 112(r1)
36543654
; PWR9BE-NEXT: xxswapd vs2, vs1
3655-
; PWR9BE-NEXT: addi r1, r1, 144
3655+
; PWR9BE-NEXT: addi r1, r1, 128
36563656
; PWR9BE-NEXT: ld r0, 16(r1)
36573657
; PWR9BE-NEXT: mtlr r0
36583658
; PWR9BE-NEXT: blr
@@ -3661,13 +3661,13 @@ define dso_local ppc_fp128 @v2ppcf128_fast(<2 x ppc_fp128> %a) local_unnamed_add
36613661
; PWR10LE: # %bb.0: # %entry
36623662
; PWR10LE-NEXT: mflr r0
36633663
; PWR10LE-NEXT: std r0, 16(r1)
3664-
; PWR10LE-NEXT: stdu r1, -64(r1)
3664+
; PWR10LE-NEXT: stdu r1, -48(r1)
36653665
; PWR10LE-NEXT: bl __gcc_qadd@notoc
36663666
; PWR10LE-NEXT: stfd f2, 40(r1)
36673667
; PWR10LE-NEXT: stfd f1, 32(r1)
36683668
; PWR10LE-NEXT: lxv vs1, 32(r1)
36693669
; PWR10LE-NEXT: xxswapd vs2, vs1
3670-
; PWR10LE-NEXT: addi r1, r1, 64
3670+
; PWR10LE-NEXT: addi r1, r1, 48
36713671
; PWR10LE-NEXT: ld r0, 16(r1)
36723672
; PWR10LE-NEXT: mtlr r0
36733673
; PWR10LE-NEXT: blr
@@ -3676,14 +3676,14 @@ define dso_local ppc_fp128 @v2ppcf128_fast(<2 x ppc_fp128> %a) local_unnamed_add
36763676
; PWR10BE: # %bb.0: # %entry
36773677
; PWR10BE-NEXT: mflr r0
36783678
; PWR10BE-NEXT: std r0, 16(r1)
3679-
; PWR10BE-NEXT: stdu r1, -144(r1)
3679+
; PWR10BE-NEXT: stdu r1, -128(r1)
36803680
; PWR10BE-NEXT: bl __gcc_qadd
36813681
; PWR10BE-NEXT: nop
36823682
; PWR10BE-NEXT: stfd f2, 120(r1)
36833683
; PWR10BE-NEXT: stfd f1, 112(r1)
36843684
; PWR10BE-NEXT: lxv vs1, 112(r1)
36853685
; PWR10BE-NEXT: xxswapd vs2, vs1
3686-
; PWR10BE-NEXT: addi r1, r1, 144
3686+
; PWR10BE-NEXT: addi r1, r1, 128
36873687
; PWR10BE-NEXT: ld r0, 16(r1)
36883688
; PWR10BE-NEXT: mtlr r0
36893689
; PWR10BE-NEXT: blr

llvm/test/CodeGen/WebAssembly/simd.ll

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -481,21 +481,6 @@ define <16 x i8> @shuffle_undef_v16i8(<16 x i8> %x, <16 x i8> %y) {
481481
; NO-SIMD128-LABEL: shuffle_undef_v16i8:
482482
; NO-SIMD128: .functype shuffle_undef_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
483483
; NO-SIMD128-NEXT: # %bb.0:
484-
; NO-SIMD128-NEXT: i32.store8 15($0), $2
485-
; NO-SIMD128-NEXT: i32.store8 14($0), $2
486-
; NO-SIMD128-NEXT: i32.store8 13($0), $2
487-
; NO-SIMD128-NEXT: i32.store8 12($0), $2
488-
; NO-SIMD128-NEXT: i32.store8 11($0), $2
489-
; NO-SIMD128-NEXT: i32.store8 10($0), $2
490-
; NO-SIMD128-NEXT: i32.store8 9($0), $2
491-
; NO-SIMD128-NEXT: i32.store8 8($0), $2
492-
; NO-SIMD128-NEXT: i32.store8 7($0), $2
493-
; NO-SIMD128-NEXT: i32.store8 6($0), $2
494-
; NO-SIMD128-NEXT: i32.store8 5($0), $2
495-
; NO-SIMD128-NEXT: i32.store8 4($0), $2
496-
; NO-SIMD128-NEXT: i32.store8 3($0), $2
497-
; NO-SIMD128-NEXT: i32.store8 2($0), $2
498-
; NO-SIMD128-NEXT: i32.store8 1($0), $2
499484
; NO-SIMD128-NEXT: i32.store8 0($0), $2
500485
; NO-SIMD128-NEXT: return
501486
%res = shufflevector <16 x i8> %x, <16 x i8> %y,
@@ -994,13 +979,6 @@ define <8 x i16> @shuffle_undef_v8i16(<8 x i16> %x, <8 x i16> %y) {
994979
; NO-SIMD128-LABEL: shuffle_undef_v8i16:
995980
; NO-SIMD128: .functype shuffle_undef_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
996981
; NO-SIMD128-NEXT: # %bb.0:
997-
; NO-SIMD128-NEXT: i32.store16 14($0), $2
998-
; NO-SIMD128-NEXT: i32.store16 12($0), $2
999-
; NO-SIMD128-NEXT: i32.store16 10($0), $2
1000-
; NO-SIMD128-NEXT: i32.store16 8($0), $2
1001-
; NO-SIMD128-NEXT: i32.store16 6($0), $2
1002-
; NO-SIMD128-NEXT: i32.store16 4($0), $2
1003-
; NO-SIMD128-NEXT: i32.store16 2($0), $2
1004982
; NO-SIMD128-NEXT: i32.store16 0($0), $2
1005983
; NO-SIMD128-NEXT: return
1006984
%res = shufflevector <8 x i16> %x, <8 x i16> %y,
@@ -1288,9 +1266,6 @@ define <4 x i32> @shuffle_undef_v4i32(<4 x i32> %x, <4 x i32> %y) {
12881266
; NO-SIMD128-LABEL: shuffle_undef_v4i32:
12891267
; NO-SIMD128: .functype shuffle_undef_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
12901268
; NO-SIMD128-NEXT: # %bb.0:
1291-
; NO-SIMD128-NEXT: i32.store 12($0), $2
1292-
; NO-SIMD128-NEXT: i32.store 8($0), $2
1293-
; NO-SIMD128-NEXT: i32.store 4($0), $2
12941269
; NO-SIMD128-NEXT: i32.store 0($0), $2
12951270
; NO-SIMD128-NEXT: return
12961271
%res = shufflevector <4 x i32> %x, <4 x i32> %y,
@@ -1550,7 +1525,6 @@ define <2 x i64> @shuffle_undef_v2i64(<2 x i64> %x, <2 x i64> %y) {
15501525
; NO-SIMD128-LABEL: shuffle_undef_v2i64:
15511526
; NO-SIMD128: .functype shuffle_undef_v2i64 (i32, i64, i64, i64, i64) -> ()
15521527
; NO-SIMD128-NEXT: # %bb.0:
1553-
; NO-SIMD128-NEXT: i64.store 8($0), $2
15541528
; NO-SIMD128-NEXT: i64.store 0($0), $2
15551529
; NO-SIMD128-NEXT: return
15561530
%res = shufflevector <2 x i64> %x, <2 x i64> %y,
@@ -1819,9 +1793,6 @@ define <4 x float> @shuffle_undef_v4f32(<4 x float> %x, <4 x float> %y) {
18191793
; NO-SIMD128-LABEL: shuffle_undef_v4f32:
18201794
; NO-SIMD128: .functype shuffle_undef_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
18211795
; NO-SIMD128-NEXT: # %bb.0:
1822-
; NO-SIMD128-NEXT: f32.store 12($0), $2
1823-
; NO-SIMD128-NEXT: f32.store 8($0), $2
1824-
; NO-SIMD128-NEXT: f32.store 4($0), $2
18251796
; NO-SIMD128-NEXT: f32.store 0($0), $2
18261797
; NO-SIMD128-NEXT: return
18271798
%res = shufflevector <4 x float> %x, <4 x float> %y,
@@ -2082,7 +2053,6 @@ define <2 x double> @shuffle_undef_v2f64(<2 x double> %x, <2 x double> %y) {
20822053
; NO-SIMD128-LABEL: shuffle_undef_v2f64:
20832054
; NO-SIMD128: .functype shuffle_undef_v2f64 (i32, f64, f64, f64, f64) -> ()
20842055
; NO-SIMD128-NEXT: # %bb.0:
2085-
; NO-SIMD128-NEXT: f64.store 8($0), $2
20862056
; NO-SIMD128-NEXT: f64.store 0($0), $2
20872057
; NO-SIMD128-NEXT: return
20882058
%res = shufflevector <2 x double> %x, <2 x double> %y,

llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -621,15 +621,14 @@ define <4 x i32> @test_srem_even_poweroftwo(<4 x i32> %X) nounwind {
621621
; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm1
622622
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
623623
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
624-
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
625-
; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3
624+
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
626625
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,3,2,3]
627626
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
628627
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
629628
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
630629
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
631630
; CHECK-SSE2-NEXT: por %xmm2, %xmm0
632-
; CHECK-SSE2-NEXT: pxor %xmm4, %xmm0
631+
; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
633632
; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
634633
; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
635634
; CHECK-SSE2-NEXT: retq
@@ -1110,15 +1109,14 @@ define <4 x i32> @test_srem_even_INT_MIN(<4 x i32> %X) nounwind {
11101109
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
11111110
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,3,2,3]
11121111
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1113-
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
1114-
; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm1
1115-
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm1[1,3,2,3]
1116-
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1]
1112+
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1113+
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,3,2,3]
1114+
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
11171115
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
11181116
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
11191117
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
11201118
; CHECK-SSE2-NEXT: por %xmm4, %xmm3
1121-
; CHECK-SSE2-NEXT: pxor %xmm5, %xmm3
1119+
; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
11221120
; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
11231121
; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
11241122
; CHECK-SSE2-NEXT: pxor %xmm3, %xmm1

llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,10 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
141141
; SSE2-NEXT: pmuludq %xmm1, %xmm0
142142
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
143143
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
144-
; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
145-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
144+
; SSE2-NEXT: movl $1463, %eax # imm = 0x5B7
145+
; SSE2-NEXT: movd %eax, %xmm3
146+
; SSE2-NEXT: pmuludq %xmm1, %xmm3
147+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
146148
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
147149
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2047,2047,2047,2047]
148150
; SSE2-NEXT: movdqa %xmm0, %xmm3

llvm/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll

Lines changed: 32 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -159,19 +159,18 @@ define <4 x i32> @test_urem_even_allones_eq(<4 x i32> %X) nounwind {
159159
; CHECK-SSE2-LABEL: test_urem_even_allones_eq:
160160
; CHECK-SSE2: # %bb.0:
161161
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
162-
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
163-
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
164-
; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1
165-
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
166162
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
167163
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
168-
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3]
169-
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
170-
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
164+
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
165+
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
166+
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
167+
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
168+
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
171169
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
170+
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
172171
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
173-
; CHECK-SSE2-NEXT: por %xmm4, %xmm0
174-
; CHECK-SSE2-NEXT: pxor %xmm2, %xmm0
172+
; CHECK-SSE2-NEXT: por %xmm2, %xmm0
173+
; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
175174
; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
176175
; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
177176
; CHECK-SSE2-NEXT: retq
@@ -237,19 +236,18 @@ define <4 x i32> @test_urem_even_allones_ne(<4 x i32> %X) nounwind {
237236
; CHECK-SSE2-LABEL: test_urem_even_allones_ne:
238237
; CHECK-SSE2: # %bb.0:
239238
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
240-
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
241-
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
242-
; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1
243-
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
244239
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
245240
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
246-
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3]
247-
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
248-
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
241+
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
242+
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
243+
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
244+
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
245+
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
249246
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
247+
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
250248
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
251-
; CHECK-SSE2-NEXT: por %xmm4, %xmm0
252-
; CHECK-SSE2-NEXT: pxor %xmm2, %xmm0
249+
; CHECK-SSE2-NEXT: por %xmm2, %xmm0
250+
; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
253251
; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
254252
; CHECK-SSE2-NEXT: psrld $31, %xmm0
255253
; CHECK-SSE2-NEXT: retq
@@ -536,19 +534,18 @@ define <4 x i32> @test_urem_even_poweroftwo(<4 x i32> %X) nounwind {
536534
; CHECK-SSE2-LABEL: test_urem_even_poweroftwo:
537535
; CHECK-SSE2: # %bb.0:
538536
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
539-
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
540-
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
541-
; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1
542-
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
543537
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
544538
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
545-
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3]
546-
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
547-
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
539+
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
540+
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
541+
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
542+
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
543+
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
548544
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
545+
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
549546
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
550-
; CHECK-SSE2-NEXT: por %xmm4, %xmm0
551-
; CHECK-SSE2-NEXT: pxor %xmm2, %xmm0
547+
; CHECK-SSE2-NEXT: por %xmm2, %xmm0
548+
; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
552549
; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
553550
; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
554551
; CHECK-SSE2-NEXT: retq
@@ -968,19 +965,18 @@ define <4 x i32> @test_urem_even_INT_MIN(<4 x i32> %X) nounwind {
968965
; CHECK-SSE2-LABEL: test_urem_even_INT_MIN:
969966
; CHECK-SSE2: # %bb.0:
970967
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
971-
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
972-
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
973-
; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1
974-
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
975968
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
976969
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
977-
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3]
978-
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
979-
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
970+
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
971+
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
972+
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
973+
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
974+
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
980975
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
976+
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
981977
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
982-
; CHECK-SSE2-NEXT: por %xmm4, %xmm0
983-
; CHECK-SSE2-NEXT: pxor %xmm2, %xmm0
978+
; CHECK-SSE2-NEXT: por %xmm2, %xmm0
979+
; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
984980
; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
985981
; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
986982
; CHECK-SSE2-NEXT: retq

llvm/test/CodeGen/X86/vec_smulo.ll

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -474,8 +474,6 @@ define <6 x i32> @smulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind {
474474
; SSE2-NEXT: pcmpgtd %xmm3, %xmm6
475475
; SSE2-NEXT: pand %xmm7, %xmm6
476476
; SSE2-NEXT: paddd %xmm8, %xmm6
477-
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
478-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
479477
; SSE2-NEXT: pmuludq %xmm2, %xmm1
480478
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
481479
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
@@ -548,8 +546,6 @@ define <6 x i32> @smulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind {
548546
; SSSE3-NEXT: pcmpgtd %xmm3, %xmm6
549547
; SSSE3-NEXT: pand %xmm7, %xmm6
550548
; SSSE3-NEXT: paddd %xmm8, %xmm6
551-
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
552-
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
553549
; SSSE3-NEXT: pmuludq %xmm2, %xmm1
554550
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
555551
; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
@@ -578,25 +574,23 @@ define <6 x i32> @smulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind {
578574
; SSE41-NEXT: movdqa %xmm0, %xmm1
579575
; SSE41-NEXT: pmuldq %xmm2, %xmm0
580576
; SSE41-NEXT: pinsrd $3, %r8d, %xmm2
581-
; SSE41-NEXT: movl {{[0-9]+}}(%rsp), %edx
577+
; SSE41-NEXT: movl {{[0-9]+}}(%rsp), %ecx
582578
; SSE41-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
583579
; SSE41-NEXT: movd %r9d, %xmm4
584580
; SSE41-NEXT: movdqa %xmm4, %xmm5
585581
; SSE41-NEXT: pmuldq %xmm3, %xmm4
586-
; SSE41-NEXT: pinsrd $1, %edx, %xmm3
587-
; SSE41-NEXT: movl {{[0-9]+}}(%rsp), %esi
588-
; SSE41-NEXT: pinsrd $1, %esi, %xmm5
582+
; SSE41-NEXT: pinsrd $1, %ecx, %xmm3
583+
; SSE41-NEXT: movl {{[0-9]+}}(%rsp), %edx
584+
; SSE41-NEXT: pinsrd $1, %edx, %xmm5
589585
; SSE41-NEXT: pmulld %xmm3, %xmm5
590586
; SSE41-NEXT: pinsrd $3, {{[0-9]+}}(%rsp), %xmm1
591-
; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %rcx
592-
; SSE41-NEXT: movd %edx, %xmm3
593-
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
594-
; SSE41-NEXT: movd %esi, %xmm6
595-
; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,0,0,0]
587+
; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %rsi
588+
; SSE41-NEXT: movd %ecx, %xmm3
589+
; SSE41-NEXT: movd %edx, %xmm6
596590
; SSE41-NEXT: pmuldq %xmm3, %xmm6
597591
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
598592
; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm6[2,3],xmm3[4,5],xmm6[6,7]
599-
; SSE41-NEXT: movq %xmm5, 16(%rcx)
593+
; SSE41-NEXT: movq %xmm5, 16(%rsi)
600594
; SSE41-NEXT: psrad $31, %xmm5
601595
; SSE41-NEXT: pcmpeqd %xmm3, %xmm5
602596
; SSE41-NEXT: pcmpeqd %xmm3, %xmm3
@@ -607,7 +601,7 @@ define <6 x i32> @smulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind {
607601
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
608602
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm6[2,3],xmm0[4,5],xmm6[6,7]
609603
; SSE41-NEXT: pmulld %xmm2, %xmm1
610-
; SSE41-NEXT: movdqa %xmm1, (%rcx)
604+
; SSE41-NEXT: movdqa %xmm1, (%rsi)
611605
; SSE41-NEXT: psrad $31, %xmm1
612606
; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
613607
; SSE41-NEXT: pxor %xmm3, %xmm1

0 commit comments

Comments
 (0)