Skip to content

Commit 02cf97f

Browse files
authored
[X86] combineConcatVectorOps - always concatenate integer binops with duplicated operands (#132735)
Only a single operand will need to be concatenated, so treat it like an unaryop
1 parent 5dd655e commit 02cf97f

File tree

2 files changed

+22
-27
lines changed

2 files changed

+22
-27
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58510,7 +58510,9 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5851058510
(EltSizeInBits >= 32 || Subtarget.useBWIRegs())))) {
5851158511
SDValue Concat0 = CombineSubOperand(VT, Ops, 0);
5851258512
SDValue Concat1 = CombineSubOperand(VT, Ops, 1);
58513-
if (Concat0 || Concat1)
58513+
if (Concat0 || Concat1 || llvm::all_of(Ops, [](SDValue Op) {
58514+
return Op.getOperand(0) == Op.getOperand(1);
58515+
}))
5851458516
return DAG.getNode(Opcode, DL, VT,
5851558517
Concat0 ? Concat0 : ConcatSubOperand(VT, Ops, 0),
5851658518
Concat1 ? Concat1 : ConcatSubOperand(VT, Ops, 1));

llvm/test/CodeGen/X86/shift-i512.ll

Lines changed: 19 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -11,36 +11,29 @@ define <8 x i64> @shl_i512_1(<8 x i64> %a) {
1111
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
1212
; AVX512VL-NEXT: vextracti32x4 $3, %zmm0, %xmm2
1313
; AVX512VL-NEXT: vextracti32x4 $2, %zmm0, %xmm3
14-
; AVX512VL-NEXT: vpaddq %xmm3, %xmm3, %xmm4
15-
; AVX512VL-NEXT: vpaddq %xmm2, %xmm2, %xmm5
16-
; AVX512VL-NEXT: vinserti128 $1, %xmm5, %ymm4, %ymm4
17-
; AVX512VL-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm5
18-
; AVX512VL-NEXT: vpshufd {{.*#+}} ymm5 = ymm5[2,3,2,3,6,7,6,7]
19-
; AVX512VL-NEXT: vpsrlq $63, %ymm5, %ymm5
20-
; AVX512VL-NEXT: vpor %ymm5, %ymm4, %ymm4
21-
; AVX512VL-NEXT: vpsllq $1, %xmm0, %xmm5
22-
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm6 = xmm0[2,3,2,3]
23-
; AVX512VL-NEXT: vpsrlq $63, %xmm6, %xmm7
24-
; AVX512VL-NEXT: vpaddq %xmm1, %xmm1, %xmm8
25-
; AVX512VL-NEXT: vpor %xmm7, %xmm8, %xmm7
26-
; AVX512VL-NEXT: vinserti128 $1, %xmm7, %ymm5, %ymm5
27-
; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm5, %zmm4
28-
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm5 = xmm3[2,3,2,3]
29-
; AVX512VL-NEXT: vpaddq %xmm5, %xmm5, %xmm5
30-
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm7 = xmm2[2,3,2,3]
31-
; AVX512VL-NEXT: vpaddq %xmm7, %xmm7, %xmm7
32-
; AVX512VL-NEXT: vinserti128 $1, %xmm7, %ymm5, %ymm5
33-
; AVX512VL-NEXT: vpaddq %xmm6, %xmm6, %xmm6
34-
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm7 = xmm1[2,3,2,3]
35-
; AVX512VL-NEXT: vpaddq %xmm7, %xmm7, %xmm7
36-
; AVX512VL-NEXT: vinserti128 $1, %xmm7, %ymm6, %ymm6
37-
; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm6, %zmm5
3814
; AVX512VL-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
15+
; AVX512VL-NEXT: vpaddq %ymm2, %ymm2, %ymm4
16+
; AVX512VL-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm3
17+
; AVX512VL-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[2,3,2,3,6,7,6,7]
18+
; AVX512VL-NEXT: vpsrlq $63, %ymm3, %ymm3
19+
; AVX512VL-NEXT: vpor %ymm3, %ymm4, %ymm3
20+
; AVX512VL-NEXT: vpsllq $1, %xmm0, %xmm4
21+
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm5 = xmm0[2,3,2,3]
22+
; AVX512VL-NEXT: vpsrlq $63, %xmm5, %xmm6
23+
; AVX512VL-NEXT: vpaddq %xmm1, %xmm1, %xmm7
24+
; AVX512VL-NEXT: vpor %xmm6, %xmm7, %xmm6
25+
; AVX512VL-NEXT: vinserti128 $1, %xmm6, %ymm4, %ymm4
26+
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3
3927
; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
4028
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
4129
; AVX512VL-NEXT: vpsrlq $63, %zmm0, %zmm0
42-
; AVX512VL-NEXT: vporq %zmm0, %zmm5, %zmm0
43-
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm4[0],zmm0[0],zmm4[2],zmm0[2],zmm4[4],zmm0[4],zmm4[6],zmm0[6]
30+
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
31+
; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm5, %ymm1
32+
; AVX512VL-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[2,3,2,3,6,7,6,7]
33+
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
34+
; AVX512VL-NEXT: vpaddq %zmm1, %zmm1, %zmm1
35+
; AVX512VL-NEXT: vporq %zmm0, %zmm1, %zmm0
36+
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm3[0],zmm0[0],zmm3[2],zmm0[2],zmm3[4],zmm0[4],zmm3[6],zmm0[6]
4437
; AVX512VL-NEXT: retq
4538
;
4639
; AVX512VBMI-LABEL: shl_i512_1:

0 commit comments

Comments
 (0)