Skip to content

Commit 0237216

Browse files
authored
[DAG] canCreateUndefOrPoison - add EXTRACT_SUBVECTOR handling (#132745)
Similar to INSERT_SUBVECTOR - the index is constant and will be inbounds
1 parent d6976d0 commit 0237216

File tree

7 files changed

+163
-169
lines changed

7 files changed

+163
-169
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5490,6 +5490,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
54905490
case ISD::FREEZE:
54915491
case ISD::CONCAT_VECTORS:
54925492
case ISD::INSERT_SUBVECTOR:
5493+
case ISD::EXTRACT_SUBVECTOR:
54935494
case ISD::SADDSAT:
54945495
case ISD::UADDSAT:
54955496
case ISD::SSUBSAT:

llvm/test/CodeGen/X86/lower-vec-shift.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -265,9 +265,9 @@ define <16 x i16> @test11(<16 x i16> %a) {
265265
; AVX1-LABEL: test11:
266266
; AVX1: # %bb.0:
267267
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
268-
; AVX1-NEXT: vpsllw $3, %xmm1, %xmm2
269-
; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm1
270-
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[3,4,5],xmm2[6],xmm1[7]
268+
; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm2
269+
; AVX1-NEXT: vpsllw $3, %xmm1, %xmm1
270+
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3,4,5],xmm1[6],xmm2[7]
271271
; AVX1-NEXT: vpsllw $3, %xmm0, %xmm2
272272
; AVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0
273273
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2,3,4],xmm2[5,6,7]

llvm/test/CodeGen/X86/midpoint-int-vec-256.ll

Lines changed: 45 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -231,22 +231,22 @@ define <8 x i32> @vec256_i32_signed_mem_reg(ptr %a1_addr, <8 x i32> %a2) nounwin
231231
define <8 x i32> @vec256_i32_signed_reg_mem(<8 x i32> %a1, ptr %a2_addr) nounwind {
232232
; AVX1-LABEL: vec256_i32_signed_reg_mem:
233233
; AVX1: # %bb.0:
234-
; AVX1-NEXT: vmovdqa (%rdi), %xmm1
235-
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm2
236-
; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm3
237-
; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm1
238-
; AVX1-NEXT: vpsubd %xmm3, %xmm1, %xmm1
234+
; AVX1-NEXT: vmovdqa (%rdi), %ymm1
235+
; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm2
236+
; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm3
237+
; AVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2
239238
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
240-
; AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm4
241-
; AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2
242-
; AVX1-NEXT: vpsubd %xmm4, %xmm2, %xmm2
243-
; AVX1-NEXT: vpsrld $1, %xmm2, %xmm2
239+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
240+
; AVX1-NEXT: vpminsd %xmm1, %xmm3, %xmm4
241+
; AVX1-NEXT: vpmaxsd %xmm1, %xmm3, %xmm1
242+
; AVX1-NEXT: vpsubd %xmm4, %xmm1, %xmm1
244243
; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
245-
; AVX1-NEXT: vpmulld %xmm1, %xmm1, %xmm1
244+
; AVX1-NEXT: vpsrld $1, %xmm2, %xmm2
246245
; AVX1-NEXT: vpmulld %xmm2, %xmm2, %xmm2
247-
; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
248-
; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
249-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
246+
; AVX1-NEXT: vpmulld %xmm1, %xmm1, %xmm1
247+
; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1
248+
; AVX1-NEXT: vpaddd %xmm0, %xmm2, %xmm0
249+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
250250
; AVX1-NEXT: retq
251251
;
252252
; AVX2-LABEL: vec256_i32_signed_reg_mem:
@@ -262,18 +262,18 @@ define <8 x i32> @vec256_i32_signed_reg_mem(<8 x i32> %a1, ptr %a2_addr) nounwin
262262
;
263263
; XOP-LABEL: vec256_i32_signed_reg_mem:
264264
; XOP: # %bb.0:
265-
; XOP-NEXT: vmovdqa (%rdi), %xmm1
266-
; XOP-NEXT: vmovdqa 16(%rdi), %xmm2
267-
; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3
268-
; XOP-NEXT: vpminsd %xmm2, %xmm3, %xmm4
269-
; XOP-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2
270-
; XOP-NEXT: vpsubd %xmm4, %xmm2, %xmm2
265+
; XOP-NEXT: vmovdqa (%rdi), %ymm1
266+
; XOP-NEXT: vextractf128 $1, %ymm0, %xmm2
267+
; XOP-NEXT: vextractf128 $1, %ymm1, %xmm3
268+
; XOP-NEXT: vpminsd %xmm3, %xmm2, %xmm4
269+
; XOP-NEXT: vpmaxsd %xmm3, %xmm2, %xmm3
270+
; XOP-NEXT: vpsubd %xmm4, %xmm3, %xmm3
271271
; XOP-NEXT: vpminsd %xmm1, %xmm0, %xmm4
272272
; XOP-NEXT: vpmaxsd %xmm1, %xmm0, %xmm1
273273
; XOP-NEXT: vpsubd %xmm4, %xmm1, %xmm1
274274
; XOP-NEXT: vpsrld $1, %xmm1, %xmm1
275-
; XOP-NEXT: vpsrld $1, %xmm2, %xmm2
276-
; XOP-NEXT: vpmacsdd %xmm3, %xmm2, %xmm2, %xmm2
275+
; XOP-NEXT: vpsrld $1, %xmm3, %xmm3
276+
; XOP-NEXT: vpmacsdd %xmm2, %xmm3, %xmm3, %xmm2
277277
; XOP-NEXT: vpmacsdd %xmm0, %xmm1, %xmm1, %xmm0
278278
; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
279279
; XOP-NEXT: retq
@@ -303,23 +303,23 @@ define <8 x i32> @vec256_i32_signed_reg_mem(<8 x i32> %a1, ptr %a2_addr) nounwin
303303
define <8 x i32> @vec256_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
304304
; AVX1-LABEL: vec256_i32_signed_mem_mem:
305305
; AVX1: # %bb.0:
306-
; AVX1-NEXT: vmovdqa (%rsi), %xmm0
307-
; AVX1-NEXT: vmovdqa 16(%rsi), %xmm1
308-
; AVX1-NEXT: vmovdqa (%rdi), %xmm2
309-
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm3
306+
; AVX1-NEXT: vmovdqa (%rsi), %ymm0
307+
; AVX1-NEXT: vmovdqa (%rdi), %xmm1
308+
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm2
309+
; AVX1-NEXT: vpminsd %xmm0, %xmm1, %xmm3
310+
; AVX1-NEXT: vpmaxsd %xmm0, %xmm1, %xmm4
311+
; AVX1-NEXT: vpsubd %xmm3, %xmm4, %xmm3
312+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
310313
; AVX1-NEXT: vpminsd %xmm0, %xmm2, %xmm4
311314
; AVX1-NEXT: vpmaxsd %xmm0, %xmm2, %xmm0
312315
; AVX1-NEXT: vpsubd %xmm4, %xmm0, %xmm0
313-
; AVX1-NEXT: vpminsd %xmm1, %xmm3, %xmm4
314-
; AVX1-NEXT: vpmaxsd %xmm1, %xmm3, %xmm1
315-
; AVX1-NEXT: vpsubd %xmm4, %xmm1, %xmm1
316-
; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
317316
; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
317+
; AVX1-NEXT: vpsrld $1, %xmm3, %xmm3
318+
; AVX1-NEXT: vpmulld %xmm3, %xmm3, %xmm3
318319
; AVX1-NEXT: vpmulld %xmm0, %xmm0, %xmm0
319-
; AVX1-NEXT: vpmulld %xmm1, %xmm1, %xmm1
320-
; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1
321320
; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0
322-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
321+
; AVX1-NEXT: vpaddd %xmm1, %xmm3, %xmm1
322+
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
323323
; AVX1-NEXT: retq
324324
;
325325
; AVX2-LABEL: vec256_i32_signed_mem_mem:
@@ -336,21 +336,21 @@ define <8 x i32> @vec256_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
336336
;
337337
; XOP-LABEL: vec256_i32_signed_mem_mem:
338338
; XOP: # %bb.0:
339-
; XOP-NEXT: vmovdqa (%rsi), %xmm0
340-
; XOP-NEXT: vmovdqa 16(%rsi), %xmm1
341-
; XOP-NEXT: vmovdqa (%rdi), %xmm2
342-
; XOP-NEXT: vmovdqa 16(%rdi), %xmm3
343-
; XOP-NEXT: vpminsd %xmm1, %xmm3, %xmm4
344-
; XOP-NEXT: vpmaxsd %xmm1, %xmm3, %xmm1
345-
; XOP-NEXT: vpsubd %xmm4, %xmm1, %xmm1
346-
; XOP-NEXT: vpminsd %xmm0, %xmm2, %xmm4
347-
; XOP-NEXT: vpmaxsd %xmm0, %xmm2, %xmm0
339+
; XOP-NEXT: vmovdqa (%rsi), %ymm0
340+
; XOP-NEXT: vmovdqa (%rdi), %xmm1
341+
; XOP-NEXT: vmovdqa 16(%rdi), %xmm2
342+
; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3
343+
; XOP-NEXT: vpminsd %xmm3, %xmm2, %xmm4
344+
; XOP-NEXT: vpmaxsd %xmm3, %xmm2, %xmm3
345+
; XOP-NEXT: vpsubd %xmm4, %xmm3, %xmm3
346+
; XOP-NEXT: vpminsd %xmm0, %xmm1, %xmm4
347+
; XOP-NEXT: vpmaxsd %xmm0, %xmm1, %xmm0
348348
; XOP-NEXT: vpsubd %xmm4, %xmm0, %xmm0
349349
; XOP-NEXT: vpsrld $1, %xmm0, %xmm0
350-
; XOP-NEXT: vpsrld $1, %xmm1, %xmm1
351-
; XOP-NEXT: vpmacsdd %xmm3, %xmm1, %xmm1, %xmm1
352-
; XOP-NEXT: vpmacsdd %xmm2, %xmm0, %xmm0, %xmm0
353-
; XOP-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
350+
; XOP-NEXT: vpsrld $1, %xmm3, %xmm3
351+
; XOP-NEXT: vpmacsdd %xmm2, %xmm3, %xmm3, %xmm2
352+
; XOP-NEXT: vpmacsdd %xmm1, %xmm0, %xmm0, %xmm0
353+
; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
354354
; XOP-NEXT: retq
355355
;
356356
; AVX512-LABEL: vec256_i32_signed_mem_mem:

llvm/test/CodeGen/X86/pr62286.ll

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,20 +26,17 @@ define i64 @PR62286(i32 %a) {
2626
; AVX1-LABEL: PR62286:
2727
; AVX1: # %bb.0:
2828
; AVX1-NEXT: vmovd %edi, %xmm0
29-
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,0]
30-
; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm1
31-
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
32-
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3],ymm1[4],ymm0[5,6,7]
33-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
34-
; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
35-
; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1
36-
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
29+
; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
30+
; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0
31+
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
32+
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
3733
; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
38-
; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0
34+
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
35+
; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1
36+
; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
3937
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
4038
; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
4139
; AVX1-NEXT: vmovq %xmm0, %rax
42-
; AVX1-NEXT: vzeroupper
4340
; AVX1-NEXT: retq
4441
;
4542
; AVX2-LABEL: PR62286:

llvm/test/CodeGen/X86/shift-i512.ll

Lines changed: 17 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,32 +8,24 @@
88
define <8 x i64> @shl_i512_1(<8 x i64> %a) {
99
; AVX512VL-LABEL: shl_i512_1:
1010
; AVX512VL: # %bb.0:
11-
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
12-
; AVX512VL-NEXT: vextracti32x4 $3, %zmm0, %xmm2
13-
; AVX512VL-NEXT: vextracti32x4 $2, %zmm0, %xmm3
11+
; AVX512VL-NEXT: valignq {{.*#+}} zmm1 = zmm0[3,4,5,6,7,0,1,2]
12+
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm2
13+
; AVX512VL-NEXT: vpsllq $1, %xmm0, %xmm3
14+
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[2,3,2,3]
15+
; AVX512VL-NEXT: vpsrlq $63, %xmm4, %xmm4
16+
; AVX512VL-NEXT: vpaddq %xmm2, %xmm2, %xmm2
17+
; AVX512VL-NEXT: vpor %xmm4, %xmm2, %xmm2
1418
; AVX512VL-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
15-
; AVX512VL-NEXT: vpaddq %ymm2, %ymm2, %ymm4
16-
; AVX512VL-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm3
17-
; AVX512VL-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[2,3,2,3,6,7,6,7]
18-
; AVX512VL-NEXT: vpsrlq $63, %ymm3, %ymm3
19-
; AVX512VL-NEXT: vpor %ymm3, %ymm4, %ymm3
20-
; AVX512VL-NEXT: vpsllq $1, %xmm0, %xmm4
21-
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm5 = xmm0[2,3,2,3]
22-
; AVX512VL-NEXT: vpsrlq $63, %xmm5, %xmm6
23-
; AVX512VL-NEXT: vpaddq %xmm1, %xmm1, %xmm7
24-
; AVX512VL-NEXT: vpor %xmm6, %xmm7, %xmm6
25-
; AVX512VL-NEXT: vinserti128 $1, %xmm6, %ymm4, %ymm4
26-
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3
27-
; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
28-
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
29-
; AVX512VL-NEXT: vpsrlq $63, %zmm0, %zmm0
30-
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
31-
; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm5, %ymm1
32-
; AVX512VL-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[2,3,2,3,6,7,6,7]
33-
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
34-
; AVX512VL-NEXT: vpaddq %zmm1, %zmm1, %zmm1
35-
; AVX512VL-NEXT: vporq %zmm0, %zmm1, %zmm0
36-
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm3[0],zmm0[0],zmm3[2],zmm0[2],zmm3[4],zmm0[4],zmm3[6],zmm0[6]
19+
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
20+
; AVX512VL-NEXT: vpaddq %ymm3, %ymm3, %ymm3
21+
; AVX512VL-NEXT: vpsrlq $63, %ymm1, %ymm1
22+
; AVX512VL-NEXT: vpor %ymm1, %ymm3, %ymm1
23+
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1
24+
; AVX512VL-NEXT: vpsrlq $63, %zmm0, %zmm2
25+
; AVX512VL-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
26+
; AVX512VL-NEXT: vpaddq %zmm0, %zmm0, %zmm0
27+
; AVX512VL-NEXT: vporq %zmm2, %zmm0, %zmm0
28+
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm1[0],zmm0[0],zmm1[2],zmm0[2],zmm1[4],zmm0[4],zmm1[6],zmm0[6]
3729
; AVX512VL-NEXT: retq
3830
;
3931
; AVX512VBMI-LABEL: shl_i512_1:

llvm/test/CodeGen/X86/vector-fshr-256.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -39,17 +39,17 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt)
3939
; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm1, %ymm1
4040
; AVX1-NEXT: vandnps %ymm3, %ymm2, %ymm2
4141
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
42-
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
43-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
44-
; AVX1-NEXT: vpaddq %xmm5, %xmm5, %xmm5
45-
; AVX1-NEXT: vpsllq %xmm4, %xmm5, %xmm4
46-
; AVX1-NEXT: vpsllq %xmm3, %xmm5, %xmm3
47-
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm4[4,5,6,7]
48-
; AVX1-NEXT: vshufps {{.*#+}} xmm4 = xmm2[2,3,2,3]
42+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
43+
; AVX1-NEXT: vpaddq %xmm4, %xmm4, %xmm4
44+
; AVX1-NEXT: vpsllq %xmm3, %xmm4, %xmm5
45+
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
46+
; AVX1-NEXT: vpsllq %xmm3, %xmm4, %xmm3
47+
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm5[0,1,2,3],xmm3[4,5,6,7]
4948
; AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0
50-
; AVX1-NEXT: vpsllq %xmm4, %xmm0, %xmm4
49+
; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm4
50+
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
5151
; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0
52-
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7]
52+
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm4[0,1,2,3],xmm0[4,5,6,7]
5353
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
5454
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
5555
; AVX1-NEXT: retq
@@ -127,23 +127,23 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt)
127127
; XOPAVX1-LABEL: var_funnnel_v4i64:
128128
; XOPAVX1: # %bb.0:
129129
; XOPAVX1-NEXT: vbroadcastsd {{.*#+}} ymm3 = [63,63,63,63]
130-
; XOPAVX1-NEXT: vandps %ymm3, %ymm2, %ymm4
130+
; XOPAVX1-NEXT: vandnps %ymm3, %ymm2, %ymm4
131131
; XOPAVX1-NEXT: vextractf128 $1, %ymm4, %xmm5
132-
; XOPAVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6
133-
; XOPAVX1-NEXT: vpsubq %xmm5, %xmm6, %xmm5
134-
; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm7
135-
; XOPAVX1-NEXT: vpshlq %xmm5, %xmm7, %xmm5
136-
; XOPAVX1-NEXT: vpsubq %xmm4, %xmm6, %xmm4
137-
; XOPAVX1-NEXT: vpshlq %xmm4, %xmm1, %xmm1
138-
; XOPAVX1-NEXT: vinsertf128 $1, %xmm5, %ymm1, %ymm1
139-
; XOPAVX1-NEXT: vandnps %ymm3, %ymm2, %ymm2
140-
; XOPAVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
141-
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
142-
; XOPAVX1-NEXT: vpaddq %xmm4, %xmm4, %xmm4
143-
; XOPAVX1-NEXT: vpshlq %xmm3, %xmm4, %xmm3
132+
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
133+
; XOPAVX1-NEXT: vpaddq %xmm6, %xmm6, %xmm6
134+
; XOPAVX1-NEXT: vpshlq %xmm5, %xmm6, %xmm5
144135
; XOPAVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0
145-
; XOPAVX1-NEXT: vpshlq %xmm2, %xmm0, %xmm0
146-
; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
136+
; XOPAVX1-NEXT: vpshlq %xmm4, %xmm0, %xmm0
137+
; XOPAVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0
138+
; XOPAVX1-NEXT: vandps %ymm3, %ymm2, %ymm2
139+
; XOPAVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
140+
; XOPAVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
141+
; XOPAVX1-NEXT: vpsubq %xmm3, %xmm4, %xmm3
142+
; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
143+
; XOPAVX1-NEXT: vpshlq %xmm3, %xmm5, %xmm3
144+
; XOPAVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm2
145+
; XOPAVX1-NEXT: vpshlq %xmm2, %xmm1, %xmm1
146+
; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
147147
; XOPAVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
148148
; XOPAVX1-NEXT: retq
149149
;

0 commit comments

Comments
 (0)