Skip to content

Commit 449e3fa

Browse files
authored
[X86] combineConcatVectorOps - add concatenation handling for X86ISD::VSHLD/VSHRD funnel shift nodes (#132915)
Concat the nodes if we can merge either of the operands for free.
1 parent b246943 commit 449e3fa

File tree

2 files changed

+43
-59
lines changed

2 files changed

+43
-59
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58572,6 +58572,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5857258572
Concat1 ? Concat1 : ConcatSubOperand(SrcVT, Ops, 1));
5857358573
}
5857458574
break;
58575+
case X86ISD::VSHLD:
58576+
case X86ISD::VSHRD:
5857558577
case X86ISD::PALIGNR:
5857658578
if (!IsSplat &&
5857758579
((VT.is256BitVector() && Subtarget.hasInt256()) ||

llvm/test/CodeGen/X86/shift-i512.ll

Lines changed: 41 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -30,28 +30,20 @@ define <8 x i64> @shl_i512_1(<8 x i64> %a) {
3030
;
3131
; AVX512VBMI-LABEL: shl_i512_1:
3232
; AVX512VBMI: # %bb.0:
33-
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm1
34-
; AVX512VBMI-NEXT: vextracti32x4 $2, %zmm0, %xmm2
35-
; AVX512VBMI-NEXT: vextracti32x4 $3, %zmm0, %xmm3
36-
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
37-
; AVX512VBMI-NEXT: vpshldq $1, %xmm3, %xmm4, %xmm4
38-
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[2,3,2,3]
39-
; AVX512VBMI-NEXT: vpshldq $1, %xmm2, %xmm5, %xmm6
40-
; AVX512VBMI-NEXT: vinserti128 $1, %xmm4, %ymm6, %ymm4
41-
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm6 = xmm1[2,3,2,3]
42-
; AVX512VBMI-NEXT: vpshldq $1, %xmm1, %xmm6, %xmm7
43-
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm8 = xmm0[2,3,2,3]
44-
; AVX512VBMI-NEXT: vpshldq $1, %xmm0, %xmm8, %xmm9
45-
; AVX512VBMI-NEXT: vinserti128 $1, %xmm7, %ymm9, %ymm7
46-
; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm4, %zmm7, %zmm4
47-
; AVX512VBMI-NEXT: vpshldq $1, %xmm8, %xmm1, %xmm1
48-
; AVX512VBMI-NEXT: vpsllq $1, %xmm0, %xmm0
49-
; AVX512VBMI-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
50-
; AVX512VBMI-NEXT: vpshldq $1, %xmm5, %xmm3, %xmm1
51-
; AVX512VBMI-NEXT: vpshldq $1, %xmm6, %xmm2, %xmm2
33+
; AVX512VBMI-NEXT: vextracti32x4 $2, %zmm0, %xmm1
34+
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm2
35+
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
36+
; AVX512VBMI-NEXT: vpshldq $1, %xmm3, %xmm2, %xmm3
37+
; AVX512VBMI-NEXT: vpsllq $1, %xmm0, %xmm4
38+
; AVX512VBMI-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3
5239
; AVX512VBMI-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
53-
; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
54-
; AVX512VBMI-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm4[0],zmm0[2],zmm4[2],zmm0[4],zmm4[4],zmm0[6],zmm4[6]
40+
; AVX512VBMI-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
41+
; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm0, %ymm2
42+
; AVX512VBMI-NEXT: vpshldq $1, %ymm1, %ymm2, %ymm1
43+
; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm1, %zmm3, %zmm1
44+
; AVX512VBMI-NEXT: vpshufd {{.*#+}} zmm2 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
45+
; AVX512VBMI-NEXT: vpshldq $1, %zmm0, %zmm2, %zmm0
46+
; AVX512VBMI-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm1[0],zmm0[0],zmm1[2],zmm0[2],zmm1[4],zmm0[4],zmm1[6],zmm0[6]
5547
; AVX512VBMI-NEXT: retq
5648
;
5749
; ZNVER4-LABEL: shl_i512_1:
@@ -131,27 +123,22 @@ define <8 x i64> @lshr_i512_1(<8 x i64> %a) {
131123
;
132124
; AVX512VBMI-LABEL: lshr_i512_1:
133125
; AVX512VBMI: # %bb.0:
134-
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm1
126+
; AVX512VBMI-NEXT: vextracti32x4 $3, %zmm0, %xmm1
135127
; AVX512VBMI-NEXT: vextracti32x4 $2, %zmm0, %xmm2
136-
; AVX512VBMI-NEXT: vextracti32x4 $3, %zmm0, %xmm3
128+
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm3
137129
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
138-
; AVX512VBMI-NEXT: vpshldq $63, %xmm3, %xmm4, %xmm5
139-
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,2,3]
140-
; AVX512VBMI-NEXT: vpshldq $63, %xmm2, %xmm6, %xmm7
141-
; AVX512VBMI-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm5
142-
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm7 = xmm1[2,3,2,3]
143-
; AVX512VBMI-NEXT: vpshldq $63, %xmm1, %xmm7, %xmm8
144-
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm9 = xmm0[2,3,2,3]
145-
; AVX512VBMI-NEXT: vpshldq $63, %xmm0, %xmm9, %xmm0
146-
; AVX512VBMI-NEXT: vinserti128 $1, %xmm8, %ymm0, %ymm0
147-
; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0
148-
; AVX512VBMI-NEXT: vpshldq $63, %xmm7, %xmm2, %xmm2
149-
; AVX512VBMI-NEXT: vpshldq $63, %xmm9, %xmm1, %xmm1
150-
; AVX512VBMI-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
151-
; AVX512VBMI-NEXT: vpshldq $63, %xmm6, %xmm3, %xmm2
152-
; AVX512VBMI-NEXT: vpsrlq $1, %xmm4, %xmm3
153-
; AVX512VBMI-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
154-
; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
130+
; AVX512VBMI-NEXT: vpshldq $63, %xmm4, %xmm2, %xmm4
131+
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm5 = xmm0[2,3,2,3]
132+
; AVX512VBMI-NEXT: vpshldq $63, %xmm5, %xmm3, %xmm3
133+
; AVX512VBMI-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3
134+
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
135+
; AVX512VBMI-NEXT: vpshldq $63, %xmm2, %xmm1, %xmm2
136+
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
137+
; AVX512VBMI-NEXT: vpsrlq $1, %xmm1, %xmm1
138+
; AVX512VBMI-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
139+
; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm1, %zmm3, %zmm1
140+
; AVX512VBMI-NEXT: vpshufd {{.*#+}} zmm2 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
141+
; AVX512VBMI-NEXT: vpshldq $63, %zmm0, %zmm2, %zmm0
155142
; AVX512VBMI-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
156143
; AVX512VBMI-NEXT: retq
157144
;
@@ -251,27 +238,22 @@ define <8 x i64> @ashr_i512_1(<8 x i64> %a) {
251238
;
252239
; AVX512VBMI-LABEL: ashr_i512_1:
253240
; AVX512VBMI: # %bb.0:
254-
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm1
241+
; AVX512VBMI-NEXT: vextracti32x4 $3, %zmm0, %xmm1
255242
; AVX512VBMI-NEXT: vextracti32x4 $2, %zmm0, %xmm2
256-
; AVX512VBMI-NEXT: vextracti32x4 $3, %zmm0, %xmm3
243+
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm3
257244
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
258-
; AVX512VBMI-NEXT: vpshldq $63, %xmm3, %xmm4, %xmm5
259-
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,2,3]
260-
; AVX512VBMI-NEXT: vpshldq $63, %xmm2, %xmm6, %xmm7
261-
; AVX512VBMI-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm5
262-
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm7 = xmm1[2,3,2,3]
263-
; AVX512VBMI-NEXT: vpshldq $63, %xmm1, %xmm7, %xmm8
264-
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm9 = xmm0[2,3,2,3]
265-
; AVX512VBMI-NEXT: vpshldq $63, %xmm0, %xmm9, %xmm0
266-
; AVX512VBMI-NEXT: vinserti128 $1, %xmm8, %ymm0, %ymm0
267-
; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0
268-
; AVX512VBMI-NEXT: vpshldq $63, %xmm7, %xmm2, %xmm2
269-
; AVX512VBMI-NEXT: vpshldq $63, %xmm9, %xmm1, %xmm1
270-
; AVX512VBMI-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
271-
; AVX512VBMI-NEXT: vpshldq $63, %xmm6, %xmm3, %xmm2
272-
; AVX512VBMI-NEXT: vpsraq $1, %xmm4, %xmm3
273-
; AVX512VBMI-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
274-
; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
245+
; AVX512VBMI-NEXT: vpshldq $63, %xmm4, %xmm2, %xmm4
246+
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm5 = xmm0[2,3,2,3]
247+
; AVX512VBMI-NEXT: vpshldq $63, %xmm5, %xmm3, %xmm3
248+
; AVX512VBMI-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3
249+
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
250+
; AVX512VBMI-NEXT: vpshldq $63, %xmm2, %xmm1, %xmm2
251+
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
252+
; AVX512VBMI-NEXT: vpsraq $1, %xmm1, %xmm1
253+
; AVX512VBMI-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
254+
; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm1, %zmm3, %zmm1
255+
; AVX512VBMI-NEXT: vpshufd {{.*#+}} zmm2 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
256+
; AVX512VBMI-NEXT: vpshldq $63, %zmm0, %zmm2, %zmm0
275257
; AVX512VBMI-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
276258
; AVX512VBMI-NEXT: retq
277259
;

0 commit comments

Comments
 (0)