Skip to content

Commit faecc73

Browse files
authored
[DAG] isSplatValue - node is a splat if all demanded elts have the same whole constant value (llvm#74443)
1 parent bdacd56 commit faecc73

13 files changed

+2325
-2351
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2881,6 +2881,12 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
28812881
}
28822882
}
28832883

2884+
// Fallback - this is a splat if all demanded elts are the same constant.
2885+
if (computeKnownBits(V, DemandedElts, Depth).isConstant()) {
2886+
UndefElts = ~DemandedElts;
2887+
return true;
2888+
}
2889+
28842890
return false;
28852891
}
28862892

llvm/test/CodeGen/ARM/vector-store.ll

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -403,17 +403,14 @@ define void @v3i8store(ptr %p) {
403403
; CHECK-LABEL: v3i8store:
404404
; CHECK: @ %bb.0:
405405
; CHECK-NEXT: sub sp, #4
406-
; CHECK-NEXT: vmov.i32 d16, #0xff
407-
; CHECK-NEXT: mov r1, sp
408-
; CHECK-NEXT: vmov.i32 d17, #0x0
409-
; CHECK-NEXT: movs r2, #0
410-
; CHECK-NEXT: vand d16, d17, d16
411-
; CHECK-NEXT: vst1.32 {d16[0]}, [r1:32]
412-
; CHECK-NEXT: vld1.32 {d16[0]}, [r1:32]
406+
; CHECK-NEXT: movs r1, #0
407+
; CHECK-NEXT: mov r2, sp
408+
; CHECK-NEXT: str r1, [sp]
409+
; CHECK-NEXT: vld1.32 {d16[0]}, [r2:32]
410+
; CHECK-NEXT: strb r1, [r0, #2]
413411
; CHECK-NEXT: vmovl.u16 q8, d16
414-
; CHECK-NEXT: strb r2, [r0, #2]
415-
; CHECK-NEXT: vmov.32 r1, d16[0]
416-
; CHECK-NEXT: strh r1, [r0]
412+
; CHECK-NEXT: vmov.32 r2, d16[0]
413+
; CHECK-NEXT: strh r2, [r0]
417414
; CHECK-NEXT: add sp, #4
418415
; CHECK-NEXT: bx lr
419416
store <3 x i8> zeroinitializer, ptr %p, align 4

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -244,14 +244,14 @@ define <8 x i64> @vrgather_shuffle_vx_v8i64(<8 x i64> %x) {
244244
; RV32-NEXT: addi a0, a0, %lo(.LCPI13_0)
245245
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu
246246
; RV32-NEXT: vle16.v v16, (a0)
247-
; RV32-NEXT: vrgatherei16.vv v12, v8, v16
247+
; RV32-NEXT: vmv.v.i v20, 5
248248
; RV32-NEXT: lui a0, %hi(.LCPI13_1)
249249
; RV32-NEXT: addi a0, a0, %lo(.LCPI13_1)
250-
; RV32-NEXT: vle16.v v8, (a0)
251-
; RV32-NEXT: li a0, 140
250+
; RV32-NEXT: vle16.v v17, (a0)
251+
; RV32-NEXT: li a0, 115
252252
; RV32-NEXT: vmv.s.x v0, a0
253-
; RV32-NEXT: vmv.v.i v16, 5
254-
; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t
253+
; RV32-NEXT: vrgatherei16.vv v12, v20, v16
254+
; RV32-NEXT: vrgatherei16.vv v12, v8, v17, v0.t
255255
; RV32-NEXT: vmv.v.v v8, v12
256256
; RV32-NEXT: ret
257257
;

llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,6 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
5151
; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
5252
; X86-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
5353
; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
54-
; X86-NEXT: xorps %xmm0, %xmm0
55-
; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
56-
; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
57-
; X86-NEXT: mulps %xmm0, %xmm0
58-
; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
5954
; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
6055
; X86-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
6156
; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
@@ -64,8 +59,10 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
6459
; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
6560
; X86-NEXT: cmpunordps %xmm0, %xmm0
6661
; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
62+
; X86-NEXT: xorps %xmm0, %xmm0
63+
; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
6764
; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
68-
; X86-NEXT: minps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
65+
; X86-NEXT: minps %xmm0, %xmm0
6966
; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
7067
; X86-NEXT: xorps %xmm0, %xmm0
7168
; X86-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
@@ -135,11 +132,6 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
135132
; X64-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
136133
; X64-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
137134
; X64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
138-
; X64-NEXT: xorps %xmm0, %xmm0
139-
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
140-
; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
141-
; X64-NEXT: mulps %xmm0, %xmm0
142-
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
143135
; X64-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
144136
; X64-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
145137
; X64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
@@ -148,8 +140,10 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
148140
; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
149141
; X64-NEXT: cmpunordps %xmm0, %xmm0
150142
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
143+
; X64-NEXT: xorps %xmm0, %xmm0
144+
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
151145
; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
152-
; X64-NEXT: minps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
146+
; X64-NEXT: minps %xmm0, %xmm0
153147
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
154148
; X64-NEXT: xorl %ebx, %ebx
155149
; X64-NEXT: xorps %xmm3, %xmm3

llvm/test/CodeGen/X86/var-permute-256.ll

Lines changed: 90 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -25,18 +25,20 @@ define <4 x i64> @var_shuffle_v4i64(<4 x i64> %v, <4 x i64> %indices) nounwind {
2525
;
2626
; AVX1-LABEL: var_shuffle_v4i64:
2727
; AVX1: # %bb.0:
28-
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,2,3]
29-
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm3
30-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
28+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
29+
; AVX1-NEXT: vpaddq %xmm2, %xmm2, %xmm2
30+
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [2,2]
31+
; AVX1-NEXT: # xmm3 = mem[0,0]
32+
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm4
3133
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm1
32-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm4
33-
; AVX1-NEXT: vpermilpd %ymm4, %ymm2, %ymm2
34+
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm3
35+
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
36+
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[2,3,2,3]
37+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
38+
; AVX1-NEXT: vpermilpd %ymm1, %ymm4, %ymm2
3439
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
35-
; AVX1-NEXT: vpermilpd %ymm4, %ymm0, %ymm0
36-
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
37-
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
38-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
39-
; AVX1-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0
40+
; AVX1-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
41+
; AVX1-NEXT: vblendvpd %ymm3, %ymm2, %ymm0, %ymm0
4042
; AVX1-NEXT: retq
4143
;
4244
; AVX2-LABEL: var_shuffle_v4i64:
@@ -88,15 +90,16 @@ define <8 x i32> @var_shuffle_v8i32(<8 x i32> %v, <8 x i32> %indices) nounwind {
8890
;
8991
; AVX1-LABEL: var_shuffle_v8i32:
9092
; AVX1: # %bb.0:
91-
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,2,3]
92-
; AVX1-NEXT: vpermilps %ymm1, %ymm2, %ymm2
93+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
94+
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [3,3,3,3]
95+
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
96+
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm3
97+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
98+
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm0[2,3,2,3]
99+
; AVX1-NEXT: vpermilps %ymm1, %ymm3, %ymm3
93100
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
94101
; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm0
95-
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm3
96-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
97-
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
98-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
99-
; AVX1-NEXT: vblendvps %ymm1, %ymm2, %ymm0, %ymm0
102+
; AVX1-NEXT: vblendvps %ymm2, %ymm3, %ymm0, %ymm0
100103
; AVX1-NEXT: retq
101104
;
102105
; INT256-LABEL: var_shuffle_v8i32:
@@ -445,18 +448,20 @@ define <4 x double> @var_shuffle_v4f64(<4 x double> %v, <4 x i64> %indices) noun
445448
;
446449
; AVX1-LABEL: var_shuffle_v4f64:
447450
; AVX1: # %bb.0:
448-
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,2,3]
449-
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm3
450-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
451+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
452+
; AVX1-NEXT: vpaddq %xmm2, %xmm2, %xmm2
453+
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [2,2]
454+
; AVX1-NEXT: # xmm3 = mem[0,0]
455+
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm4
451456
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm1
452-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm4
453-
; AVX1-NEXT: vpermilpd %ymm4, %ymm2, %ymm2
457+
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm3
458+
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
459+
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[2,3,2,3]
460+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
461+
; AVX1-NEXT: vpermilpd %ymm1, %ymm4, %ymm2
454462
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
455-
; AVX1-NEXT: vpermilpd %ymm4, %ymm0, %ymm0
456-
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
457-
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
458-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
459-
; AVX1-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0
463+
; AVX1-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
464+
; AVX1-NEXT: vblendvpd %ymm3, %ymm2, %ymm0, %ymm0
460465
; AVX1-NEXT: retq
461466
;
462467
; AVX2-LABEL: var_shuffle_v4f64:
@@ -508,15 +513,16 @@ define <8 x float> @var_shuffle_v8f32(<8 x float> %v, <8 x i32> %indices) nounwi
508513
;
509514
; AVX1-LABEL: var_shuffle_v8f32:
510515
; AVX1: # %bb.0:
511-
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,2,3]
512-
; AVX1-NEXT: vpermilps %ymm1, %ymm2, %ymm2
516+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
517+
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [3,3,3,3]
518+
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
519+
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm3
520+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
521+
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm0[2,3,2,3]
522+
; AVX1-NEXT: vpermilps %ymm1, %ymm3, %ymm3
513523
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
514524
; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm0
515-
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm3
516-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
517-
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
518-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
519-
; AVX1-NEXT: vblendvps %ymm1, %ymm2, %ymm0, %ymm0
525+
; AVX1-NEXT: vblendvps %ymm2, %ymm3, %ymm0, %ymm0
520526
; AVX1-NEXT: retq
521527
;
522528
; INT256-LABEL: var_shuffle_v8f32:
@@ -569,17 +575,19 @@ define <4 x i64> @var_shuffle_v4i64_from_v2i64(<2 x i64> %v, <4 x i64> %indices)
569575
; AVX1-LABEL: var_shuffle_v4i64_from_v2i64:
570576
; AVX1: # %bb.0:
571577
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
572-
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
573-
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm2
574-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
578+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
579+
; AVX1-NEXT: vpaddq %xmm2, %xmm2, %xmm2
580+
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [2,2]
581+
; AVX1-NEXT: # xmm3 = mem[0,0]
582+
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm4
575583
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm1
576-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm3
577-
; AVX1-NEXT: vpermilpd %ymm3, %ymm0, %ymm0
578-
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
579-
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
580-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
581-
; AVX1-NEXT: vpermilpd %ymm3, %ymm0, %ymm2
582-
; AVX1-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0
584+
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm3
585+
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
586+
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
587+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
588+
; AVX1-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
589+
; AVX1-NEXT: vpermilpd %ymm1, %ymm0, %ymm1
590+
; AVX1-NEXT: vblendvpd %ymm3, %ymm1, %ymm0, %ymm0
583591
; AVX1-NEXT: retq
584592
;
585593
; AVX2-LABEL: var_shuffle_v4i64_from_v2i64:
@@ -633,14 +641,15 @@ define <8 x i32> @var_shuffle_v8i32_from_v4i32(<4 x i32> %v, <8 x i32> %indices)
633641
; AVX1-LABEL: var_shuffle_v8i32_from_v4i32:
634642
; AVX1: # %bb.0: # %entry
635643
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
636-
; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm2
644+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
645+
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [3,3,3,3]
646+
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
647+
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm3
648+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
649+
; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm3
637650
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
638651
; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm0
639-
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm3
640-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
641-
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
642-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
643-
; AVX1-NEXT: vblendvps %ymm1, %ymm2, %ymm0, %ymm0
652+
; AVX1-NEXT: vblendvps %ymm2, %ymm3, %ymm0, %ymm0
644653
; AVX1-NEXT: retq
645654
;
646655
; INT256-LABEL: var_shuffle_v8i32_from_v4i32:
@@ -990,17 +999,19 @@ define <4 x double> @var_shuffle_v4f64_from_v2f64(<2 x double> %v, <4 x i64> %in
990999
; AVX1-LABEL: var_shuffle_v4f64_from_v2f64:
9911000
; AVX1: # %bb.0:
9921001
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
993-
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
994-
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm2
995-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1002+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1003+
; AVX1-NEXT: vpaddq %xmm2, %xmm2, %xmm2
1004+
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [2,2]
1005+
; AVX1-NEXT: # xmm3 = mem[0,0]
1006+
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm4
9961007
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm1
997-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm3
998-
; AVX1-NEXT: vpermilpd %ymm3, %ymm0, %ymm0
999-
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1000-
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
1001-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
1002-
; AVX1-NEXT: vpermilpd %ymm3, %ymm0, %ymm2
1003-
; AVX1-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0
1008+
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm3
1009+
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
1010+
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1011+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1012+
; AVX1-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
1013+
; AVX1-NEXT: vpermilpd %ymm1, %ymm0, %ymm1
1014+
; AVX1-NEXT: vblendvpd %ymm3, %ymm1, %ymm0, %ymm0
10041015
; AVX1-NEXT: retq
10051016
;
10061017
; AVX2-LABEL: var_shuffle_v4f64_from_v2f64:
@@ -1054,14 +1065,15 @@ define <8 x float> @var_shuffle_v8f32_from_v4f32(<4 x float> %v, <8 x i32> %indi
10541065
; AVX1-LABEL: var_shuffle_v8f32_from_v4f32:
10551066
; AVX1: # %bb.0: # %entry
10561067
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1057-
; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm2
1068+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1069+
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [3,3,3,3]
1070+
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
1071+
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm3
1072+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1073+
; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm3
10581074
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
10591075
; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm0
1060-
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm3
1061-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1062-
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
1063-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
1064-
; AVX1-NEXT: vblendvps %ymm1, %ymm2, %ymm0, %ymm0
1076+
; AVX1-NEXT: vblendvps %ymm2, %ymm3, %ymm0, %ymm0
10651077
; AVX1-NEXT: retq
10661078
;
10671079
; INT256-LABEL: var_shuffle_v8f32_from_v4f32:
@@ -1271,20 +1283,22 @@ define <4 x i64> @var_shuffle_v4i64_with_v16i8_indices(<4 x i64> %v, <16 x i8> %
12711283
;
12721284
; AVX1-LABEL: var_shuffle_v4i64_with_v16i8_indices:
12731285
; AVX1: # %bb.0:
1274-
; AVX1-NEXT: vpsrld $16, %xmm1, %xmm2
1275-
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
1286+
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1287+
; AVX1-NEXT: vpsrld $16, %xmm1, %xmm1
12761288
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1277-
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm0[2,3,2,3]
12781289
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm1
1290+
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [2,2]
1291+
; AVX1-NEXT: # xmm3 = mem[0,0]
1292+
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm4
12791293
; AVX1-NEXT: vpaddq %xmm2, %xmm2, %xmm2
1280-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm4
1281-
; AVX1-NEXT: vpermilpd %ymm4, %ymm3, %ymm3
1294+
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm3
1295+
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
1296+
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[2,3,2,3]
1297+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
1298+
; AVX1-NEXT: vpermilpd %ymm1, %ymm4, %ymm2
12821299
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1283-
; AVX1-NEXT: vpermilpd %ymm4, %ymm0, %ymm0
1284-
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1285-
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm2, %xmm2
1286-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1287-
; AVX1-NEXT: vblendvpd %ymm1, %ymm3, %ymm0, %ymm0
1300+
; AVX1-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
1301+
; AVX1-NEXT: vblendvpd %ymm3, %ymm2, %ymm0, %ymm0
12881302
; AVX1-NEXT: retq
12891303
;
12901304
; AVX2-LABEL: var_shuffle_v4i64_with_v16i8_indices:

0 commit comments

Comments
 (0)