Skip to content

Commit d92ce34

Browse files
committed
Revert faecc73 #74443 [DAG] isSplatValue - node is a splat if all demanded elts have the same whole constant value (#74443)
Relying on ComputeKnownBits to find a splat is causing miscompilations where a shift of zero is being assumed to give zero, but further simplification leads to a shift of zero by undef, resulting in an unexpected undef value. Fixes #78109
1 parent e4c8c58 commit d92ce34

14 files changed

+2364
-2337
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2881,12 +2881,6 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
28812881
}
28822882
}
28832883

2884-
// Fallback - this is a splat if all demanded elts are the same constant.
2885-
if (computeKnownBits(V, DemandedElts, Depth).isConstant()) {
2886-
UndefElts = ~DemandedElts;
2887-
return true;
2888-
}
2889-
28902884
return false;
28912885
}
28922886

llvm/test/CodeGen/ARM/vector-store.ll

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -403,14 +403,17 @@ define void @v3i8store(ptr %p) {
403403
; CHECK-LABEL: v3i8store:
404404
; CHECK: @ %bb.0:
405405
; CHECK-NEXT: sub sp, #4
406-
; CHECK-NEXT: movs r1, #0
407-
; CHECK-NEXT: mov r2, sp
408-
; CHECK-NEXT: str r1, [sp]
409-
; CHECK-NEXT: vld1.32 {d16[0]}, [r2:32]
410-
; CHECK-NEXT: strb r1, [r0, #2]
406+
; CHECK-NEXT: vmov.i32 d16, #0xff
407+
; CHECK-NEXT: mov r1, sp
408+
; CHECK-NEXT: vmov.i32 d17, #0x0
409+
; CHECK-NEXT: movs r2, #0
410+
; CHECK-NEXT: vand d16, d17, d16
411+
; CHECK-NEXT: vst1.32 {d16[0]}, [r1:32]
412+
; CHECK-NEXT: vld1.32 {d16[0]}, [r1:32]
411413
; CHECK-NEXT: vmovl.u16 q8, d16
412-
; CHECK-NEXT: vmov.32 r2, d16[0]
413-
; CHECK-NEXT: strh r2, [r0]
414+
; CHECK-NEXT: strb r2, [r0, #2]
415+
; CHECK-NEXT: vmov.32 r1, d16[0]
416+
; CHECK-NEXT: strh r1, [r0]
414417
; CHECK-NEXT: add sp, #4
415418
; CHECK-NEXT: bx lr
416419
store <3 x i8> zeroinitializer, ptr %p, align 4

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -244,14 +244,14 @@ define <8 x i64> @vrgather_shuffle_vx_v8i64(<8 x i64> %x) {
244244
; RV32-NEXT: addi a0, a0, %lo(.LCPI13_0)
245245
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu
246246
; RV32-NEXT: vle16.v v16, (a0)
247-
; RV32-NEXT: vmv.v.i v20, 5
247+
; RV32-NEXT: vrgatherei16.vv v12, v8, v16
248248
; RV32-NEXT: lui a0, %hi(.LCPI13_1)
249249
; RV32-NEXT: addi a0, a0, %lo(.LCPI13_1)
250-
; RV32-NEXT: vle16.v v17, (a0)
251-
; RV32-NEXT: li a0, 115
250+
; RV32-NEXT: vle16.v v8, (a0)
251+
; RV32-NEXT: li a0, 140
252252
; RV32-NEXT: vmv.s.x v0, a0
253-
; RV32-NEXT: vrgatherei16.vv v12, v20, v16
254-
; RV32-NEXT: vrgatherei16.vv v12, v8, v17, v0.t
253+
; RV32-NEXT: vmv.v.i v16, 5
254+
; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t
255255
; RV32-NEXT: vmv.v.v v8, v12
256256
; RV32-NEXT: ret
257257
;

llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,11 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
5151
; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
5252
; X86-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
5353
; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
54+
; X86-NEXT: xorps %xmm0, %xmm0
55+
; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
56+
; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
57+
; X86-NEXT: mulps %xmm0, %xmm0
58+
; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
5459
; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
5560
; X86-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
5661
; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
@@ -59,10 +64,8 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
5964
; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
6065
; X86-NEXT: cmpunordps %xmm0, %xmm0
6166
; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
62-
; X86-NEXT: xorps %xmm0, %xmm0
63-
; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
6467
; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
65-
; X86-NEXT: minps %xmm0, %xmm0
68+
; X86-NEXT: minps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
6669
; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
6770
; X86-NEXT: xorps %xmm0, %xmm0
6871
; X86-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
@@ -132,6 +135,11 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
132135
; X64-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
133136
; X64-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
134137
; X64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
138+
; X64-NEXT: xorps %xmm0, %xmm0
139+
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
140+
; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
141+
; X64-NEXT: mulps %xmm0, %xmm0
142+
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
135143
; X64-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
136144
; X64-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
137145
; X64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
@@ -140,10 +148,8 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
140148
; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
141149
; X64-NEXT: cmpunordps %xmm0, %xmm0
142150
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
143-
; X64-NEXT: xorps %xmm0, %xmm0
144-
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
145151
; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
146-
; X64-NEXT: minps %xmm0, %xmm0
152+
; X64-NEXT: minps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
147153
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
148154
; X64-NEXT: xorl %ebx, %ebx
149155
; X64-NEXT: xorps %xmm3, %xmm3

llvm/test/CodeGen/X86/pr78109.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,17 @@
44
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX
55
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX
66

7-
; FIXME: Failure to recognise undef elements in constant foldable splats
7+
; Check for failure to recognise undef elements in constant foldable splats
88
define <4 x i32> @PR78109() {
99
; SSE-LABEL: PR78109:
1010
; SSE: # %bb.0:
11-
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1]
11+
; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,1,0,1]
1212
; SSE-NEXT: retq
1313
;
1414
; AVX-LABEL: PR78109:
1515
; AVX: # %bb.0:
16-
; AVX-NEXT: vbroadcastss {{.*#+}} xmm0 = [1,1,1,1]
16+
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = [0,1,0,1]
17+
; AVX-NEXT: # xmm0 = mem[0,0]
1718
; AVX-NEXT: retq
1819
%shuffle.1 = shufflevector <4 x i32> <i32 7, i32 7, i32 0, i32 7>, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 2, i32 1, i32 1> ; <0, 0, 7, 7>
1920
%shift = lshr <4 x i32> %shuffle.1, <i32 0, i32 0, i32 1, i32 0> ; <0, 0, 3, 7>

llvm/test/CodeGen/X86/var-permute-256.ll

Lines changed: 76 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -25,20 +25,18 @@ define <4 x i64> @var_shuffle_v4i64(<4 x i64> %v, <4 x i64> %indices) nounwind {
2525
;
2626
; AVX1-LABEL: var_shuffle_v4i64:
2727
; AVX1: # %bb.0:
28-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
29-
; AVX1-NEXT: vpaddq %xmm2, %xmm2, %xmm2
30-
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [2,2]
31-
; AVX1-NEXT: # xmm3 = mem[0,0]
32-
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm4
28+
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,2,3]
29+
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm3
30+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
3331
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm1
34-
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm3
35-
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
36-
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[2,3,2,3]
37-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
38-
; AVX1-NEXT: vpermilpd %ymm1, %ymm4, %ymm2
32+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm4
33+
; AVX1-NEXT: vpermilpd %ymm4, %ymm2, %ymm2
3934
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
40-
; AVX1-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
41-
; AVX1-NEXT: vblendvpd %ymm3, %ymm2, %ymm0, %ymm0
35+
; AVX1-NEXT: vpermilpd %ymm4, %ymm0, %ymm0
36+
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
37+
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
38+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
39+
; AVX1-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0
4240
; AVX1-NEXT: retq
4341
;
4442
; AVX2-LABEL: var_shuffle_v4i64:
@@ -90,16 +88,15 @@ define <8 x i32> @var_shuffle_v8i32(<8 x i32> %v, <8 x i32> %indices) nounwind {
9088
;
9189
; AVX1-LABEL: var_shuffle_v8i32:
9290
; AVX1: # %bb.0:
93-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
94-
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [3,3,3,3]
95-
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
96-
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm3
97-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
98-
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm0[2,3,2,3]
99-
; AVX1-NEXT: vpermilps %ymm1, %ymm3, %ymm3
91+
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,2,3]
92+
; AVX1-NEXT: vpermilps %ymm1, %ymm2, %ymm2
10093
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
10194
; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm0
102-
; AVX1-NEXT: vblendvps %ymm2, %ymm3, %ymm0, %ymm0
95+
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm3
96+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
97+
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
98+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
99+
; AVX1-NEXT: vblendvps %ymm1, %ymm2, %ymm0, %ymm0
103100
; AVX1-NEXT: retq
104101
;
105102
; INT256-LABEL: var_shuffle_v8i32:
@@ -448,20 +445,18 @@ define <4 x double> @var_shuffle_v4f64(<4 x double> %v, <4 x i64> %indices) noun
448445
;
449446
; AVX1-LABEL: var_shuffle_v4f64:
450447
; AVX1: # %bb.0:
451-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
452-
; AVX1-NEXT: vpaddq %xmm2, %xmm2, %xmm2
453-
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [2,2]
454-
; AVX1-NEXT: # xmm3 = mem[0,0]
455-
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm4
448+
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,2,3]
449+
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm3
450+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
456451
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm1
457-
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm3
458-
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
459-
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[2,3,2,3]
460-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
461-
; AVX1-NEXT: vpermilpd %ymm1, %ymm4, %ymm2
452+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm4
453+
; AVX1-NEXT: vpermilpd %ymm4, %ymm2, %ymm2
462454
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
463-
; AVX1-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
464-
; AVX1-NEXT: vblendvpd %ymm3, %ymm2, %ymm0, %ymm0
455+
; AVX1-NEXT: vpermilpd %ymm4, %ymm0, %ymm0
456+
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
457+
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
458+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
459+
; AVX1-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0
465460
; AVX1-NEXT: retq
466461
;
467462
; AVX2-LABEL: var_shuffle_v4f64:
@@ -513,16 +508,15 @@ define <8 x float> @var_shuffle_v8f32(<8 x float> %v, <8 x i32> %indices) nounwi
513508
;
514509
; AVX1-LABEL: var_shuffle_v8f32:
515510
; AVX1: # %bb.0:
516-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
517-
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [3,3,3,3]
518-
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
519-
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm3
520-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
521-
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm0[2,3,2,3]
522-
; AVX1-NEXT: vpermilps %ymm1, %ymm3, %ymm3
511+
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,2,3]
512+
; AVX1-NEXT: vpermilps %ymm1, %ymm2, %ymm2
523513
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
524514
; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm0
525-
; AVX1-NEXT: vblendvps %ymm2, %ymm3, %ymm0, %ymm0
515+
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm3
516+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
517+
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
518+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
519+
; AVX1-NEXT: vblendvps %ymm1, %ymm2, %ymm0, %ymm0
526520
; AVX1-NEXT: retq
527521
;
528522
; INT256-LABEL: var_shuffle_v8f32:
@@ -575,19 +569,17 @@ define <4 x i64> @var_shuffle_v4i64_from_v2i64(<2 x i64> %v, <4 x i64> %indices)
575569
; AVX1-LABEL: var_shuffle_v4i64_from_v2i64:
576570
; AVX1: # %bb.0:
577571
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
578-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
579-
; AVX1-NEXT: vpaddq %xmm2, %xmm2, %xmm2
580-
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [2,2]
581-
; AVX1-NEXT: # xmm3 = mem[0,0]
582-
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm4
583-
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm1
584-
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm3
585-
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
586572
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
587-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
588-
; AVX1-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
589-
; AVX1-NEXT: vpermilpd %ymm1, %ymm0, %ymm1
590-
; AVX1-NEXT: vblendvpd %ymm3, %ymm1, %ymm0, %ymm0
573+
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm2
574+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
575+
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm1
576+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm3
577+
; AVX1-NEXT: vpermilpd %ymm3, %ymm0, %ymm0
578+
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
579+
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
580+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
581+
; AVX1-NEXT: vpermilpd %ymm3, %ymm0, %ymm2
582+
; AVX1-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0
591583
; AVX1-NEXT: retq
592584
;
593585
; AVX2-LABEL: var_shuffle_v4i64_from_v2i64:
@@ -641,15 +633,14 @@ define <8 x i32> @var_shuffle_v8i32_from_v4i32(<4 x i32> %v, <8 x i32> %indices)
641633
; AVX1-LABEL: var_shuffle_v8i32_from_v4i32:
642634
; AVX1: # %bb.0: # %entry
643635
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
644-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
645-
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [3,3,3,3]
646-
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
647-
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm3
648-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
649-
; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm3
636+
; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm2
650637
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
651638
; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm0
652-
; AVX1-NEXT: vblendvps %ymm2, %ymm3, %ymm0, %ymm0
639+
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm3
640+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
641+
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
642+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
643+
; AVX1-NEXT: vblendvps %ymm1, %ymm2, %ymm0, %ymm0
653644
; AVX1-NEXT: retq
654645
;
655646
; INT256-LABEL: var_shuffle_v8i32_from_v4i32:
@@ -999,19 +990,17 @@ define <4 x double> @var_shuffle_v4f64_from_v2f64(<2 x double> %v, <4 x i64> %in
999990
; AVX1-LABEL: var_shuffle_v4f64_from_v2f64:
1000991
; AVX1: # %bb.0:
1001992
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1002-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1003-
; AVX1-NEXT: vpaddq %xmm2, %xmm2, %xmm2
1004-
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [2,2]
1005-
; AVX1-NEXT: # xmm3 = mem[0,0]
1006-
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm4
1007-
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm1
1008-
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm3
1009-
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
1010993
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1011-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1012-
; AVX1-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
1013-
; AVX1-NEXT: vpermilpd %ymm1, %ymm0, %ymm1
1014-
; AVX1-NEXT: vblendvpd %ymm3, %ymm1, %ymm0, %ymm0
994+
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm2
995+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
996+
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm1
997+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm3
998+
; AVX1-NEXT: vpermilpd %ymm3, %ymm0, %ymm0
999+
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1000+
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
1001+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
1002+
; AVX1-NEXT: vpermilpd %ymm3, %ymm0, %ymm2
1003+
; AVX1-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0
10151004
; AVX1-NEXT: retq
10161005
;
10171006
; AVX2-LABEL: var_shuffle_v4f64_from_v2f64:
@@ -1065,15 +1054,14 @@ define <8 x float> @var_shuffle_v8f32_from_v4f32(<4 x float> %v, <8 x i32> %indi
10651054
; AVX1-LABEL: var_shuffle_v8f32_from_v4f32:
10661055
; AVX1: # %bb.0: # %entry
10671056
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1068-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1069-
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [3,3,3,3]
1070-
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
1071-
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm3
1072-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1073-
; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm3
1057+
; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm2
10741058
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
10751059
; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm0
1076-
; AVX1-NEXT: vblendvps %ymm2, %ymm3, %ymm0, %ymm0
1060+
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm3
1061+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1062+
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
1063+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
1064+
; AVX1-NEXT: vblendvps %ymm1, %ymm2, %ymm0, %ymm0
10771065
; AVX1-NEXT: retq
10781066
;
10791067
; INT256-LABEL: var_shuffle_v8f32_from_v4f32:
@@ -1283,22 +1271,20 @@ define <4 x i64> @var_shuffle_v4i64_with_v16i8_indices(<4 x i64> %v, <16 x i8> %
12831271
;
12841272
; AVX1-LABEL: var_shuffle_v4i64_with_v16i8_indices:
12851273
; AVX1: # %bb.0:
1286-
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1287-
; AVX1-NEXT: vpsrld $16, %xmm1, %xmm1
1274+
; AVX1-NEXT: vpsrld $16, %xmm1, %xmm2
1275+
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
12881276
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1277+
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm0[2,3,2,3]
12891278
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm1
1290-
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [2,2]
1291-
; AVX1-NEXT: # xmm3 = mem[0,0]
1292-
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm4
12931279
; AVX1-NEXT: vpaddq %xmm2, %xmm2, %xmm2
1294-
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm3
1295-
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
1296-
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[2,3,2,3]
1297-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
1298-
; AVX1-NEXT: vpermilpd %ymm1, %ymm4, %ymm2
1280+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm4
1281+
; AVX1-NEXT: vpermilpd %ymm4, %ymm3, %ymm3
12991282
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1300-
; AVX1-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
1301-
; AVX1-NEXT: vblendvpd %ymm3, %ymm2, %ymm0, %ymm0
1283+
; AVX1-NEXT: vpermilpd %ymm4, %ymm0, %ymm0
1284+
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1285+
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm2, %xmm2
1286+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1287+
; AVX1-NEXT: vblendvpd %ymm1, %ymm3, %ymm0, %ymm0
13021288
; AVX1-NEXT: retq
13031289
;
13041290
; AVX2-LABEL: var_shuffle_v4i64_with_v16i8_indices:

0 commit comments

Comments
 (0)