Skip to content

Commit 5e79ae6

Browse files
authored
DAG: Fix vector_shuffle -> splat fold defining undef lanes (#123596)
For shuffle vector splats with undef lanes in the mask, this was introducing real values. Filter out build_vector results based on the undef elements in the mask. This avoids AMDGPU test regressions in a future change. test/CodeGen/X86/urem-seteq-illegal-types.ll looks worse but I didn't investigate.
1 parent e3c16e0 commit 5e79ae6

File tree

9 files changed

+1038
-641
lines changed

9 files changed

+1038
-641
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26386,9 +26386,17 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
2638626386
if (AllSame)
2638726387
return N0;
2638826388

26389-
// Canonicalize any other splat as a build_vector.
26389+
// Canonicalize any other splat as a build_vector, but avoid defining any
26390+
// undefined elements in the mask.
2639026391
SDValue Splatted = V->getOperand(SplatIndex);
2639126392
SmallVector<SDValue, 8> Ops(NumElts, Splatted);
26393+
EVT EltVT = Splatted.getValueType();
26394+
26395+
for (unsigned i = 0; i != NumElts; ++i) {
26396+
if (SVN->getMaskElt(i) < 0)
26397+
Ops[i] = DAG.getUNDEF(EltVT);
26398+
}
26399+
2639226400
SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
2639326401

2639426402
// We may have jumped through bitcasts, so the type of the

llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll

Lines changed: 500 additions & 287 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll

Lines changed: 500 additions & 287 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/PowerPC/vector-reduce-fadd.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3628,31 +3628,31 @@ define dso_local ppc_fp128 @v2ppcf128_fast(<2 x ppc_fp128> %a) local_unnamed_add
36283628
; PWR9LE-LABEL: v2ppcf128_fast:
36293629
; PWR9LE: # %bb.0: # %entry
36303630
; PWR9LE-NEXT: mflr r0
3631-
; PWR9LE-NEXT: stdu r1, -64(r1)
3632-
; PWR9LE-NEXT: std r0, 80(r1)
3631+
; PWR9LE-NEXT: stdu r1, -48(r1)
3632+
; PWR9LE-NEXT: std r0, 64(r1)
36333633
; PWR9LE-NEXT: bl __gcc_qadd
36343634
; PWR9LE-NEXT: nop
36353635
; PWR9LE-NEXT: stfd f2, 40(r1)
36363636
; PWR9LE-NEXT: stfd f1, 32(r1)
36373637
; PWR9LE-NEXT: lxv vs1, 32(r1)
36383638
; PWR9LE-NEXT: xxswapd vs2, vs1
3639-
; PWR9LE-NEXT: addi r1, r1, 64
3639+
; PWR9LE-NEXT: addi r1, r1, 48
36403640
; PWR9LE-NEXT: ld r0, 16(r1)
36413641
; PWR9LE-NEXT: mtlr r0
36423642
; PWR9LE-NEXT: blr
36433643
;
36443644
; PWR9BE-LABEL: v2ppcf128_fast:
36453645
; PWR9BE: # %bb.0: # %entry
36463646
; PWR9BE-NEXT: mflr r0
3647-
; PWR9BE-NEXT: stdu r1, -144(r1)
3648-
; PWR9BE-NEXT: std r0, 160(r1)
3647+
; PWR9BE-NEXT: stdu r1, -128(r1)
3648+
; PWR9BE-NEXT: std r0, 144(r1)
36493649
; PWR9BE-NEXT: bl __gcc_qadd
36503650
; PWR9BE-NEXT: nop
36513651
; PWR9BE-NEXT: stfd f2, 120(r1)
36523652
; PWR9BE-NEXT: stfd f1, 112(r1)
36533653
; PWR9BE-NEXT: lxv vs1, 112(r1)
36543654
; PWR9BE-NEXT: xxswapd vs2, vs1
3655-
; PWR9BE-NEXT: addi r1, r1, 144
3655+
; PWR9BE-NEXT: addi r1, r1, 128
36563656
; PWR9BE-NEXT: ld r0, 16(r1)
36573657
; PWR9BE-NEXT: mtlr r0
36583658
; PWR9BE-NEXT: blr
@@ -3661,13 +3661,13 @@ define dso_local ppc_fp128 @v2ppcf128_fast(<2 x ppc_fp128> %a) local_unnamed_add
36613661
; PWR10LE: # %bb.0: # %entry
36623662
; PWR10LE-NEXT: mflr r0
36633663
; PWR10LE-NEXT: std r0, 16(r1)
3664-
; PWR10LE-NEXT: stdu r1, -64(r1)
3664+
; PWR10LE-NEXT: stdu r1, -48(r1)
36653665
; PWR10LE-NEXT: bl __gcc_qadd@notoc
36663666
; PWR10LE-NEXT: stfd f2, 40(r1)
36673667
; PWR10LE-NEXT: stfd f1, 32(r1)
36683668
; PWR10LE-NEXT: lxv vs1, 32(r1)
36693669
; PWR10LE-NEXT: xxswapd vs2, vs1
3670-
; PWR10LE-NEXT: addi r1, r1, 64
3670+
; PWR10LE-NEXT: addi r1, r1, 48
36713671
; PWR10LE-NEXT: ld r0, 16(r1)
36723672
; PWR10LE-NEXT: mtlr r0
36733673
; PWR10LE-NEXT: blr
@@ -3676,14 +3676,14 @@ define dso_local ppc_fp128 @v2ppcf128_fast(<2 x ppc_fp128> %a) local_unnamed_add
36763676
; PWR10BE: # %bb.0: # %entry
36773677
; PWR10BE-NEXT: mflr r0
36783678
; PWR10BE-NEXT: std r0, 16(r1)
3679-
; PWR10BE-NEXT: stdu r1, -144(r1)
3679+
; PWR10BE-NEXT: stdu r1, -128(r1)
36803680
; PWR10BE-NEXT: bl __gcc_qadd
36813681
; PWR10BE-NEXT: nop
36823682
; PWR10BE-NEXT: stfd f2, 120(r1)
36833683
; PWR10BE-NEXT: stfd f1, 112(r1)
36843684
; PWR10BE-NEXT: lxv vs1, 112(r1)
36853685
; PWR10BE-NEXT: xxswapd vs2, vs1
3686-
; PWR10BE-NEXT: addi r1, r1, 144
3686+
; PWR10BE-NEXT: addi r1, r1, 128
36873687
; PWR10BE-NEXT: ld r0, 16(r1)
36883688
; PWR10BE-NEXT: mtlr r0
36893689
; PWR10BE-NEXT: blr

llvm/test/CodeGen/WebAssembly/simd.ll

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -481,21 +481,6 @@ define <16 x i8> @shuffle_undef_v16i8(<16 x i8> %x, <16 x i8> %y) {
481481
; NO-SIMD128-LABEL: shuffle_undef_v16i8:
482482
; NO-SIMD128: .functype shuffle_undef_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
483483
; NO-SIMD128-NEXT: # %bb.0:
484-
; NO-SIMD128-NEXT: i32.store8 15($0), $2
485-
; NO-SIMD128-NEXT: i32.store8 14($0), $2
486-
; NO-SIMD128-NEXT: i32.store8 13($0), $2
487-
; NO-SIMD128-NEXT: i32.store8 12($0), $2
488-
; NO-SIMD128-NEXT: i32.store8 11($0), $2
489-
; NO-SIMD128-NEXT: i32.store8 10($0), $2
490-
; NO-SIMD128-NEXT: i32.store8 9($0), $2
491-
; NO-SIMD128-NEXT: i32.store8 8($0), $2
492-
; NO-SIMD128-NEXT: i32.store8 7($0), $2
493-
; NO-SIMD128-NEXT: i32.store8 6($0), $2
494-
; NO-SIMD128-NEXT: i32.store8 5($0), $2
495-
; NO-SIMD128-NEXT: i32.store8 4($0), $2
496-
; NO-SIMD128-NEXT: i32.store8 3($0), $2
497-
; NO-SIMD128-NEXT: i32.store8 2($0), $2
498-
; NO-SIMD128-NEXT: i32.store8 1($0), $2
499484
; NO-SIMD128-NEXT: i32.store8 0($0), $2
500485
; NO-SIMD128-NEXT: return
501486
%res = shufflevector <16 x i8> %x, <16 x i8> %y,
@@ -994,13 +979,6 @@ define <8 x i16> @shuffle_undef_v8i16(<8 x i16> %x, <8 x i16> %y) {
994979
; NO-SIMD128-LABEL: shuffle_undef_v8i16:
995980
; NO-SIMD128: .functype shuffle_undef_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
996981
; NO-SIMD128-NEXT: # %bb.0:
997-
; NO-SIMD128-NEXT: i32.store16 14($0), $2
998-
; NO-SIMD128-NEXT: i32.store16 12($0), $2
999-
; NO-SIMD128-NEXT: i32.store16 10($0), $2
1000-
; NO-SIMD128-NEXT: i32.store16 8($0), $2
1001-
; NO-SIMD128-NEXT: i32.store16 6($0), $2
1002-
; NO-SIMD128-NEXT: i32.store16 4($0), $2
1003-
; NO-SIMD128-NEXT: i32.store16 2($0), $2
1004982
; NO-SIMD128-NEXT: i32.store16 0($0), $2
1005983
; NO-SIMD128-NEXT: return
1006984
%res = shufflevector <8 x i16> %x, <8 x i16> %y,
@@ -1288,9 +1266,6 @@ define <4 x i32> @shuffle_undef_v4i32(<4 x i32> %x, <4 x i32> %y) {
12881266
; NO-SIMD128-LABEL: shuffle_undef_v4i32:
12891267
; NO-SIMD128: .functype shuffle_undef_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
12901268
; NO-SIMD128-NEXT: # %bb.0:
1291-
; NO-SIMD128-NEXT: i32.store 12($0), $2
1292-
; NO-SIMD128-NEXT: i32.store 8($0), $2
1293-
; NO-SIMD128-NEXT: i32.store 4($0), $2
12941269
; NO-SIMD128-NEXT: i32.store 0($0), $2
12951270
; NO-SIMD128-NEXT: return
12961271
%res = shufflevector <4 x i32> %x, <4 x i32> %y,
@@ -1550,7 +1525,6 @@ define <2 x i64> @shuffle_undef_v2i64(<2 x i64> %x, <2 x i64> %y) {
15501525
; NO-SIMD128-LABEL: shuffle_undef_v2i64:
15511526
; NO-SIMD128: .functype shuffle_undef_v2i64 (i32, i64, i64, i64, i64) -> ()
15521527
; NO-SIMD128-NEXT: # %bb.0:
1553-
; NO-SIMD128-NEXT: i64.store 8($0), $2
15541528
; NO-SIMD128-NEXT: i64.store 0($0), $2
15551529
; NO-SIMD128-NEXT: return
15561530
%res = shufflevector <2 x i64> %x, <2 x i64> %y,
@@ -1819,9 +1793,6 @@ define <4 x float> @shuffle_undef_v4f32(<4 x float> %x, <4 x float> %y) {
18191793
; NO-SIMD128-LABEL: shuffle_undef_v4f32:
18201794
; NO-SIMD128: .functype shuffle_undef_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
18211795
; NO-SIMD128-NEXT: # %bb.0:
1822-
; NO-SIMD128-NEXT: f32.store 12($0), $2
1823-
; NO-SIMD128-NEXT: f32.store 8($0), $2
1824-
; NO-SIMD128-NEXT: f32.store 4($0), $2
18251796
; NO-SIMD128-NEXT: f32.store 0($0), $2
18261797
; NO-SIMD128-NEXT: return
18271798
%res = shufflevector <4 x float> %x, <4 x float> %y,
@@ -2082,7 +2053,6 @@ define <2 x double> @shuffle_undef_v2f64(<2 x double> %x, <2 x double> %y) {
20822053
; NO-SIMD128-LABEL: shuffle_undef_v2f64:
20832054
; NO-SIMD128: .functype shuffle_undef_v2f64 (i32, f64, f64, f64, f64) -> ()
20842055
; NO-SIMD128-NEXT: # %bb.0:
2085-
; NO-SIMD128-NEXT: f64.store 8($0), $2
20862056
; NO-SIMD128-NEXT: f64.store 0($0), $2
20872057
; NO-SIMD128-NEXT: return
20882058
%res = shufflevector <2 x double> %x, <2 x double> %y,

llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,10 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
141141
; SSE2-NEXT: pmuludq %xmm1, %xmm0
142142
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
143143
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
144-
; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
145-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
144+
; SSE2-NEXT: movl $1463, %eax # imm = 0x5B7
145+
; SSE2-NEXT: movd %eax, %xmm3
146+
; SSE2-NEXT: pmuludq %xmm1, %xmm3
147+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
146148
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
147149
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2047,2047,2047,2047]
148150
; SSE2-NEXT: movdqa %xmm0, %xmm3

llvm/test/CodeGen/X86/vec_smulo.ll

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -474,8 +474,6 @@ define <6 x i32> @smulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind {
474474
; SSE2-NEXT: pcmpgtd %xmm3, %xmm6
475475
; SSE2-NEXT: pand %xmm7, %xmm6
476476
; SSE2-NEXT: paddd %xmm8, %xmm6
477-
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
478-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
479477
; SSE2-NEXT: pmuludq %xmm2, %xmm1
480478
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
481479
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
@@ -548,8 +546,6 @@ define <6 x i32> @smulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind {
548546
; SSSE3-NEXT: pcmpgtd %xmm3, %xmm6
549547
; SSSE3-NEXT: pand %xmm7, %xmm6
550548
; SSSE3-NEXT: paddd %xmm8, %xmm6
551-
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
552-
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
553549
; SSSE3-NEXT: pmuludq %xmm2, %xmm1
554550
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
555551
; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
@@ -578,25 +574,23 @@ define <6 x i32> @smulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind {
578574
; SSE41-NEXT: movdqa %xmm0, %xmm1
579575
; SSE41-NEXT: pmuldq %xmm2, %xmm0
580576
; SSE41-NEXT: pinsrd $3, %r8d, %xmm2
581-
; SSE41-NEXT: movl {{[0-9]+}}(%rsp), %edx
577+
; SSE41-NEXT: movl {{[0-9]+}}(%rsp), %ecx
582578
; SSE41-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
583579
; SSE41-NEXT: movd %r9d, %xmm4
584580
; SSE41-NEXT: movdqa %xmm4, %xmm5
585581
; SSE41-NEXT: pmuldq %xmm3, %xmm4
586-
; SSE41-NEXT: pinsrd $1, %edx, %xmm3
587-
; SSE41-NEXT: movl {{[0-9]+}}(%rsp), %esi
588-
; SSE41-NEXT: pinsrd $1, %esi, %xmm5
582+
; SSE41-NEXT: pinsrd $1, %ecx, %xmm3
583+
; SSE41-NEXT: movl {{[0-9]+}}(%rsp), %edx
584+
; SSE41-NEXT: pinsrd $1, %edx, %xmm5
589585
; SSE41-NEXT: pmulld %xmm3, %xmm5
590586
; SSE41-NEXT: pinsrd $3, {{[0-9]+}}(%rsp), %xmm1
591-
; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %rcx
592-
; SSE41-NEXT: movd %edx, %xmm3
593-
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
594-
; SSE41-NEXT: movd %esi, %xmm6
595-
; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,0,0,0]
587+
; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %rsi
588+
; SSE41-NEXT: movd %ecx, %xmm3
589+
; SSE41-NEXT: movd %edx, %xmm6
596590
; SSE41-NEXT: pmuldq %xmm3, %xmm6
597591
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
598592
; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm6[2,3],xmm3[4,5],xmm6[6,7]
599-
; SSE41-NEXT: movq %xmm5, 16(%rcx)
593+
; SSE41-NEXT: movq %xmm5, 16(%rsi)
600594
; SSE41-NEXT: psrad $31, %xmm5
601595
; SSE41-NEXT: pcmpeqd %xmm3, %xmm5
602596
; SSE41-NEXT: pcmpeqd %xmm3, %xmm3
@@ -607,7 +601,7 @@ define <6 x i32> @smulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind {
607601
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
608602
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm6[2,3],xmm0[4,5],xmm6[6,7]
609603
; SSE41-NEXT: pmulld %xmm2, %xmm1
610-
; SSE41-NEXT: movdqa %xmm1, (%rcx)
604+
; SSE41-NEXT: movdqa %xmm1, (%rsi)
611605
; SSE41-NEXT: psrad $31, %xmm1
612606
; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
613607
; SSE41-NEXT: pxor %xmm3, %xmm1

llvm/test/CodeGen/X86/vec_umulo.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -394,8 +394,8 @@ define <6 x i32> @umulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind {
394394
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
395395
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
396396
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
397-
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = mem[0,0,0,0]
398-
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = mem[0,0,0,0]
397+
; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
398+
; SSE2-NEXT: movd {{.*#+}} xmm6 = mem[0],zero,zero,zero
399399
; SSE2-NEXT: pmuludq %xmm2, %xmm6
400400
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,3,2,3]
401401
; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,3,2,3]
@@ -444,8 +444,8 @@ define <6 x i32> @umulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind {
444444
; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
445445
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
446446
; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
447-
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = mem[0,0,0,0]
448-
; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = mem[0,0,0,0]
447+
; SSSE3-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
448+
; SSSE3-NEXT: movd {{.*#+}} xmm6 = mem[0],zero,zero,zero
449449
; SSSE3-NEXT: pmuludq %xmm2, %xmm6
450450
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,3,2,3]
451451
; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,3,2,3]
@@ -492,9 +492,7 @@ define <6 x i32> @umulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind {
492492
; SSE41-NEXT: pcmpeqd %xmm6, %xmm6
493493
; SSE41-NEXT: pxor %xmm6, %xmm3
494494
; SSE41-NEXT: movd %edi, %xmm7
495-
; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm7[0,0,0,0]
496495
; SSE41-NEXT: movd %r9d, %xmm8
497-
; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm8[0,0,0,0]
498496
; SSE41-NEXT: pmuludq %xmm7, %xmm8
499497
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
500498
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm8[2,3],xmm1[4,5],xmm8[6,7]

llvm/test/CodeGen/X86/widen_shuffle-1.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,14 +105,13 @@ define void @shuf5(ptr %p) nounwind {
105105
; X86-LABEL: shuf5:
106106
; X86: # %bb.0:
107107
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
108-
; X86-NEXT: movsd {{.*#+}} xmm0 = [33,33,33,33,33,33,33,33,0,0,0,0,0,0,0,0]
108+
; X86-NEXT: movsd {{.*#+}} xmm0 = [33,33,u,u,u,u,u,u,0,0,u,u,u,u,u,u]
109109
; X86-NEXT: movsd %xmm0, (%eax)
110110
; X86-NEXT: retl
111111
;
112112
; X64-LABEL: shuf5:
113113
; X64: # %bb.0:
114-
; X64-NEXT: movabsq $2387225703656530209, %rax # imm = 0x2121212121212121
115-
; X64-NEXT: movq %rax, (%rdi)
114+
; X64-NEXT: movq $8481, (%rdi) # imm = 0x2121
116115
; X64-NEXT: retq
117116
%v = shufflevector <2 x i8> <i8 4, i8 33>, <2 x i8> poison, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
118117
store <8 x i8> %v, ptr %p, align 8

0 commit comments

Comments
 (0)