Skip to content

Commit 58a335a

Browse files
committed
[X86] Fold concat_vectors(permq(x),permq(x)) -> permq(concat_vectors(x,x))
Handle a common subvector shuffle pattern in combineConcatVectorOps
1 parent 4444a7e commit 58a335a

File tree

3 files changed

+45
-43
lines changed

3 files changed

+45
-43
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54572,6 +54572,14 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5457254572
Op0.getValueType() == cast<MemSDNode>(SrcVec)->getMemoryVT())
5457354573
return Op0.getOperand(0);
5457454574
}
54575+
54576+
// concat_vectors(permq(x),permq(x)) -> permq(concat_vectors(x,x))
54577+
if (Op0.getOpcode() == X86ISD::VPERMI && Subtarget.useAVX512Regs() &&
54578+
!X86::mayFoldLoad(Op0.getOperand(0), Subtarget))
54579+
return DAG.getNode(Op0.getOpcode(), DL, VT,
54580+
DAG.getNode(ISD::CONCAT_VECTORS, DL, VT,
54581+
Op0.getOperand(0), Op0.getOperand(0)),
54582+
Op0.getOperand(1));
5457554583
}
5457654584

5457754585
// concat(extract_subvector(v0,c0), extract_subvector(v1,c1)) -> vperm2x128.

llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -725,19 +725,17 @@ define void @store_i8_stride7_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vecp
725725
; AVX512BW-SLOW-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero
726726
; AVX512BW-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
727727
; AVX512BW-SLOW-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
728-
; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,2,1,3]
729-
; AVX512BW-SLOW-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
730-
; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm1 = zero,zero,zero,zero,zmm1[0,8],zero,zero,zero,zero,zero,zmm1[1,9],zero,zero,zero,zero,zero,zero,zero,zmm1[18],zero,zero,zero,zero,zero,zero,zmm1[19],zero,zero,zero,zero,zmm1[36,44],zero,zero,zero,zero,zero,zmm1[37,45],zero,zero,zero,zero,zero,zmm1[38,46,54],zero,zero,zero,zero,zero,zero,zmm1[55],zero,zero,zero,zero,zero,zero,zero,zero
731-
; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,0,2]
732728
; AVX512BW-SLOW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
729+
; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm1 = zmm0[0,2,1,3,4,6,5,7]
730+
; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm1 = zero,zero,zero,zero,zmm1[0,8],zero,zero,zero,zero,zero,zmm1[1,9],zero,zero,zero,zero,zero,zero,zero,zmm1[18],zero,zero,zero,zero,zero,zero,zmm1[19],zero,zero,zero,zero,zmm1[36,44],zero,zero,zero,zero,zero,zmm1[37,45],zero,zero,zero,zero,zero,zmm1[38,46,54],zero,zero,zero,zero,zero,zero,zmm1[55],zero,zero,zero,zero,zero,zero,zero,zero
731+
; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm0 = zmm0[1,3,0,2,5,7,4,6]
733732
; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zero,zero,zero,zero,zmm0[0],zero,zero,zero,zero,zero,zero,zmm0[1],zero,zero,zero,zero,zmm0[18,26],zero,zero,zero,zero,zero,zmm0[19,27],zero,zero,zero,zero,zero,zero,zero,zmm0[36],zero,zero,zero,zero,zero,zero,zmm0[37],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[55,63],zero,zero,zero,zero,zero,zero,zero,zero,zero
734733
; AVX512BW-SLOW-NEXT: vporq %zmm1, %zmm0, %zmm0
735734
; AVX512BW-SLOW-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm1
736-
; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm1 = zmm1[0,8],zero,zero,zero,zero,zero,zmm1[1,9],zero,zero,zero,zero,zero,zmm1[2,10,18,26],zero,zero,zero,zero,zero,zmm1[19,27],zero,zero,zero,zero,zero,zmm1[20,28],zero,zero,zero,zmm1[37,45],zero,zero,zero,zero,zero,zmm1[38,46],zero,zero,zero,zero,zero,zero,zero,zmm1[55,63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
737-
; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,3,0,1]
738-
; AVX512BW-SLOW-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
739-
; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm2 = zero,zero,zmm2[0,8],zero,zero,zero,zero,zero,zmm2[1,9],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm2[19,27],zero,zero,zero,zero,zero,zmm2[20,28],zero,zero,zero,zero,zero,zero,zero,zmm2[37,45],zero,zero,zero,zero,zero,zmm2[38,46],zero,zero,zero,zmm2[55,63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
740-
; AVX512BW-SLOW-NEXT: vporq %zmm1, %zmm2, %zmm1
735+
; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm2 = zmm1[0,8],zero,zero,zero,zero,zero,zmm1[1,9],zero,zero,zero,zero,zero,zmm1[2,10,18,26],zero,zero,zero,zero,zero,zmm1[19,27],zero,zero,zero,zero,zero,zmm1[20,28],zero,zero,zero,zmm1[37,45],zero,zero,zero,zero,zero,zmm1[38,46],zero,zero,zero,zero,zero,zero,zero,zmm1[55,63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
736+
; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm1 = zmm1[2,3,0,1,6,7,4,5]
737+
; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm1 = zero,zero,zmm1[0,8],zero,zero,zero,zero,zero,zmm1[1,9],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm1[19,27],zero,zero,zero,zero,zero,zmm1[20,28],zero,zero,zero,zero,zero,zero,zero,zmm1[37,45],zero,zero,zero,zero,zero,zmm1[38,46],zero,zero,zero,zmm1[55,63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
738+
; AVX512BW-SLOW-NEXT: vporq %zmm2, %zmm1, %zmm1
741739
; AVX512BW-SLOW-NEXT: movabsq $63546854584629360, %rcx # imm = 0xE1C3870E1C3870
742740
; AVX512BW-SLOW-NEXT: kmovq %rcx, %k1
743741
; AVX512BW-SLOW-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}

llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll

Lines changed: 30 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -5684,10 +5684,9 @@ define void @vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32(ptr %in
56845684
;
56855685
; AVX512BW-LABEL: vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32:
56865686
; AVX512BW: # %bb.0:
5687-
; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
5688-
; AVX512BW-NEXT: vpaddb (%rsi), %ymm0, %ymm0
5689-
; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
5690-
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
5687+
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
5688+
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
5689+
; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
56915690
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0],zero,zmm0[0],zero,zmm0[0],zero,zmm0[0],zero,zmm0[0],zero,zmm0[0],zero,zmm0[0],zero,zmm0[0],zero,zmm0[16],zero,zmm0[16],zero,zmm0[16],zero,zmm0[16],zero,zmm0[16],zero,zmm0[16],zero,zmm0[16],zero,zmm0[16],zero,zmm0[32],zero,zmm0[32],zero,zmm0[32],zero,zmm0[32],zero,zmm0[32],zero,zmm0[32],zero,zmm0[32],zero,zmm0[32],zero,zmm0[48],zero,zmm0[48],zero,zmm0[48],zero,zmm0[48],zero,zmm0[48],zero,zmm0[48],zero,zmm0[48],zero,zmm0[48],zero
56925691
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
56935692
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -5797,10 +5796,9 @@ define void @vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16(ptr %in
57975796
;
57985797
; AVX512BW-LABEL: vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16:
57995798
; AVX512BW: # %bb.0:
5800-
; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
5801-
; AVX512BW-NEXT: vpaddb (%rsi), %ymm0, %ymm0
5802-
; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
5803-
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
5799+
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
5800+
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
5801+
; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
58045802
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0],zero,zero,zero,zmm0[0],zero,zero,zero,zmm0[0],zero,zero,zero,zmm0[0],zero,zero,zero,zmm0[16],zero,zero,zero,zmm0[16],zero,zero,zero,zmm0[16],zero,zero,zero,zmm0[16],zero,zero,zero,zmm0[32],zero,zero,zero,zmm0[32],zero,zero,zero,zmm0[32],zero,zero,zero,zmm0[32],zero,zero,zero,zmm0[48],zero,zero,zero,zmm0[48],zero,zero,zero,zmm0[48],zero,zero,zero,zmm0[48],zero,zero,zero
58055803
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
58065804
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -5910,10 +5908,9 @@ define void @vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8(ptr %in.v
59105908
;
59115909
; AVX512BW-LABEL: vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8:
59125910
; AVX512BW: # %bb.0:
5913-
; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
5914-
; AVX512BW-NEXT: vpaddb (%rsi), %ymm0, %ymm0
5915-
; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
5916-
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
5911+
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
5912+
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
5913+
; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
59175914
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0],zero,zero,zero,zero,zero,zero,zero,zmm0[0],zero,zero,zero,zero,zero,zero,zero,zmm0[16],zero,zero,zero,zero,zero,zero,zero,zmm0[16],zero,zero,zero,zero,zero,zero,zero,zmm0[32],zero,zero,zero,zero,zero,zero,zero,zmm0[32],zero,zero,zero,zero,zero,zero,zero,zmm0[48],zero,zero,zero,zero,zero,zero,zero,zmm0[48],zero,zero,zero,zero,zero,zero,zero
59185915
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
59195916
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -6004,10 +6001,9 @@ define void @vec512_i8_widen_to_i128_factor16_broadcast_to_v4i128_factor4(ptr %i
60046001
;
60056002
; AVX512BW-LABEL: vec512_i8_widen_to_i128_factor16_broadcast_to_v4i128_factor4:
60066003
; AVX512BW: # %bb.0:
6007-
; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
6008-
; AVX512BW-NEXT: vpaddb (%rsi), %ymm0, %ymm0
6009-
; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
6010-
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
6004+
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
6005+
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
6006+
; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
60116007
; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
60126008
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
60136009
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -6211,12 +6207,12 @@ define void @vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16(ptr %i
62116207
;
62126208
; AVX512BW-LABEL: vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16:
62136209
; AVX512BW: # %bb.0:
6214-
; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
6215-
; AVX512BW-NEXT: vpaddb (%rsi), %ymm0, %ymm0
6216-
; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
6217-
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
6218-
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,1],zero,zero,zmm0[0,1],zero,zero,zmm0[0,1],zero,zero,zmm0[0,1],zero,zero,zmm0[16,17],zero,zero,zmm0[16,17],zero,zero,zmm0[16,17],zero,zero,zmm0[16,17],zero,zero,zmm0[32,33],zero,zero,zmm0[32,33],zero,zero,zmm0[32,33],zero,zero,zmm0[32,33],zero,zero,zmm0[48,49],zero,zero,zmm0[48,49],zero,zero,zmm0[48,49],zero,zero,zmm0[48,49],zero,zero
6219-
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
6210+
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
6211+
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
6212+
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
6213+
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,33,0,35,0,37,0,39,0,41,0,43,0,45,0,47,0,49,0,51,0,53,0,55,0,57,0,59,0,61,0,63]
6214+
; AVX512BW-NEXT: vpermi2w %zmm1, %zmm0, %zmm2
6215+
; AVX512BW-NEXT: vpaddb (%rdx), %zmm2, %zmm0
62206216
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
62216217
; AVX512BW-NEXT: vzeroupper
62226218
; AVX512BW-NEXT: retq
@@ -6330,12 +6326,12 @@ define void @vec512_i16_widen_to_i64_factor4_broadcast_to_v8i64_factor8(ptr %in.
63306326
;
63316327
; AVX512BW-LABEL: vec512_i16_widen_to_i64_factor4_broadcast_to_v8i64_factor8:
63326328
; AVX512BW: # %bb.0:
6333-
; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
6334-
; AVX512BW-NEXT: vpaddb (%rsi), %ymm0, %ymm0
6335-
; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
6336-
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
6337-
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,1],zero,zero,zero,zero,zero,zero,zmm0[0,1],zero,zero,zero,zero,zero,zero,zmm0[16,17],zero,zero,zero,zero,zero,zero,zmm0[16,17],zero,zero,zero,zero,zero,zero,zmm0[32,33],zero,zero,zero,zero,zero,zero,zmm0[32,33],zero,zero,zero,zero,zero,zero,zmm0[48,49],zero,zero,zero,zero,zero,zero,zmm0[48,49],zero,zero,zero,zero,zero,zero
6338-
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
6329+
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
6330+
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
6331+
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
6332+
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,33,34,35,0,37,38,39,0,41,42,43,0,45,46,47,0,49,50,51,0,53,54,55,0,57,58,59,0,61,62,63]
6333+
; AVX512BW-NEXT: vpermi2w %zmm1, %zmm0, %zmm2
6334+
; AVX512BW-NEXT: vpaddb (%rdx), %zmm2, %zmm0
63396335
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
63406336
; AVX512BW-NEXT: vzeroupper
63416337
; AVX512BW-NEXT: retq
@@ -6449,12 +6445,12 @@ define void @vec512_i16_widen_to_i128_factor8_broadcast_to_v4i128_factor4(ptr %i
64496445
;
64506446
; AVX512BW-LABEL: vec512_i16_widen_to_i128_factor8_broadcast_to_v4i128_factor4:
64516447
; AVX512BW: # %bb.0:
6452-
; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
6453-
; AVX512BW-NEXT: vpaddb (%rsi), %ymm0, %ymm0
6454-
; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
6455-
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
6456-
; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
6457-
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
6448+
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
6449+
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
6450+
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
6451+
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,33,34,35,36,37,38,39,0,41,42,43,44,45,46,47,0,49,50,51,52,53,54,55,0,57,58,59,60,61,62,63]
6452+
; AVX512BW-NEXT: vpermi2w %zmm1, %zmm0, %zmm2
6453+
; AVX512BW-NEXT: vpaddb (%rdx), %zmm2, %zmm0
64586454
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
64596455
; AVX512BW-NEXT: vzeroupper
64606456
; AVX512BW-NEXT: retq

0 commit comments

Comments
 (0)