Skip to content

Commit eed7c5e

Browse files
committed
[X86] Peek through bitcast to find more opportunity for VPERMV3 -> VPERMV combine
A follow up of #96414
1 parent d9f1166 commit eed7c5e

File tree

4 files changed

+19
-22
lines changed

4 files changed

+19
-22
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41336,8 +41336,9 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4133641336
case X86ISD::VPERMV3: {
4133741337
// VPERM[I,T]2[B,W] are 3 uops on Skylake and Icelake so we try to use
4133841338
// VPERMV.
41339-
SDValue V1 = N.getOperand(0);
41340-
SDValue V2 = N.getOperand(2);
41339+
SDValue V1 = peekThroughBitcasts(N.getOperand(0));
41340+
SDValue V2 = peekThroughBitcasts(N.getOperand(2));
41341+
MVT SVT = V1.getSimpleValueType();
4134141342
MVT EVT = VT.getVectorElementType();
4134241343
MVT NVT = VT.getDoubleNumVectorElementsVT();
4134341344
if ((EVT == MVT::i8 || EVT == MVT::i16) &&
@@ -41346,14 +41347,15 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4134641347
V1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4134741348
V1.getConstantOperandVal(1) == 0 &&
4134841349
V2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
41349-
V2.getConstantOperandVal(1) == VT.getVectorNumElements() &&
41350+
V2.getConstantOperandVal(1) == SVT.getVectorNumElements() &&
4135041351
V1.getOperand(0) == V2.getOperand(0)) {
4135141352
SDValue Mask =
4135241353
DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NVT, DAG.getUNDEF(NVT),
4135341354
N.getOperand(1), DAG.getIntPtrConstant(0, DL));
4135441355
return DAG.getNode(
4135541356
ISD::EXTRACT_SUBVECTOR, DL, VT,
41356-
DAG.getNode(X86ISD::VPERMV, DL, NVT, Mask, V1.getOperand(0)),
41357+
DAG.getNode(X86ISD::VPERMV, DL, NVT, Mask,
41358+
DAG.getBitcast(NVT, V1.getOperand(0))),
4135741359
DAG.getIntPtrConstant(0, DL));
4135841360
}
4135941361

llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1638,10 +1638,9 @@ define void @vec256_i16_widen_to_i32_factor2_broadcast_to_v8i32_factor8(ptr %in.
16381638
; AVX512BW: # %bb.0:
16391639
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
16401640
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
1641-
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1642-
; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm2 = [0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31]
1643-
; AVX512BW-NEXT: vpermi2w %ymm1, %ymm0, %ymm2
1644-
; AVX512BW-NEXT: vpaddb (%rdx), %zmm2, %zmm0
1641+
; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm1 = [0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31]
1642+
; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0
1643+
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
16451644
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
16461645
; AVX512BW-NEXT: vzeroupper
16471646
; AVX512BW-NEXT: retq
@@ -3539,11 +3538,10 @@ define void @vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12(ptr %i
35393538
; AVX512BW: # %bb.0:
35403539
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
35413540
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
3542-
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
3543-
; AVX512BW-NEXT: vpmovsxbw {{.*#+}} xmm2 = [0,25,0,27,0,29,0,31]
3544-
; AVX512BW-NEXT: vpermi2w %ymm1, %ymm0, %ymm2
3541+
; AVX512BW-NEXT: vpmovsxbw {{.*#+}} xmm1 = [0,25,0,27,0,29,0,31]
3542+
; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm1
35453543
; AVX512BW-NEXT: vpbroadcastw %xmm0, %ymm0
3546-
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
3544+
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
35473545
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
35483546
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
35493547
; AVX512BW-NEXT: vzeroupper
@@ -3672,11 +3670,10 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
36723670
; AVX512BW: # %bb.0:
36733671
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
36743672
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
3675-
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
3676-
; AVX512BW-NEXT: vpmovsxbw {{.*#+}} xmm2 = [0,25,26,0,28,29,0,31]
3677-
; AVX512BW-NEXT: vpermi2w %ymm1, %ymm0, %ymm2
3673+
; AVX512BW-NEXT: vpmovsxbw {{.*#+}} xmm1 = [0,25,26,0,28,29,0,31]
3674+
; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm1
36783675
; AVX512BW-NEXT: vpbroadcastw %xmm0, %xmm0
3679-
; AVX512BW-NEXT: vinserti32x4 $2, %xmm0, %zmm2, %zmm0
3676+
; AVX512BW-NEXT: vinserti32x4 $2, %xmm0, %zmm1, %zmm0
36803677
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
36813678
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
36823679
; AVX512BW-NEXT: vzeroupper

llvm/test/CodeGen/X86/shuffle-vs-trunc-128.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -856,8 +856,7 @@ define <16 x i8> @oddelts_v32i16_shuffle_v16i16_to_v16i8(<32 x i16> %n2) nounwin
856856
; AVX512VBMI-LABEL: oddelts_v32i16_shuffle_v16i16_to_v16i8:
857857
; AVX512VBMI: # %bb.0:
858858
; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [2,6,10,14,18,22,26,30,34,38,42,46,50,54,58,62]
859-
; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm0, %ymm2
860-
; AVX512VBMI-NEXT: vpermt2b %ymm2, %ymm1, %ymm0
859+
; AVX512VBMI-NEXT: vpermb %zmm0, %zmm1, %zmm0
861860
; AVX512VBMI-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
862861
; AVX512VBMI-NEXT: vzeroupper
863862
; AVX512VBMI-NEXT: retq

llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1638,10 +1638,9 @@ define void @vec256_i16_widen_to_i32_factor2_broadcast_to_v8i32_factor8(ptr %in.
16381638
; AVX512BW: # %bb.0:
16391639
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
16401640
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
1641-
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1642-
; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm2 = [0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31]
1643-
; AVX512BW-NEXT: vpermi2w %ymm1, %ymm0, %ymm2
1644-
; AVX512BW-NEXT: vpaddb (%rdx), %zmm2, %zmm0
1641+
; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm1 = [0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31]
1642+
; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0
1643+
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
16451644
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
16461645
; AVX512BW-NEXT: vzeroupper
16471646
; AVX512BW-NEXT: retq

0 commit comments

Comments
 (0)