Skip to content

[X86] combineConcatVectorOps - add concatenation handling for X86ISD::VSHLD/VSHRD funnel shift nodes #132915

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 25, 2025

Conversation

RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Mar 25, 2025

Concat the nodes if we can merge either of the operands for free.

…:VSHLD/VSHRD funnel shift nodes

Concat the nodes if we can merge either of the operands for free.
@llvmbot
Copy link
Member

llvmbot commented Mar 25, 2025

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Concat the nodes if we can merge either of the operands for free.


Full diff: https://github.com/llvm/llvm-project/pull/132915.diff

2 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+2)
  • (modified) llvm/test/CodeGen/X86/shift-i512.ll (+41-59)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 74ce6f4efb1cf..ca7d8f847e382 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -58572,6 +58572,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
               Concat1 ? Concat1 : ConcatSubOperand(SrcVT, Ops, 1));
       }
       break;
+    case X86ISD::VSHLD:
+    case X86ISD::VSHRD:
     case X86ISD::PALIGNR:
       if (!IsSplat &&
           ((VT.is256BitVector() && Subtarget.hasInt256()) ||
diff --git a/llvm/test/CodeGen/X86/shift-i512.ll b/llvm/test/CodeGen/X86/shift-i512.ll
index 6b210cd6166d0..f7dd1dc0949f5 100644
--- a/llvm/test/CodeGen/X86/shift-i512.ll
+++ b/llvm/test/CodeGen/X86/shift-i512.ll
@@ -30,28 +30,20 @@ define <8 x i64> @shl_i512_1(<8 x i64> %a)  {
 ;
 ; AVX512VBMI-LABEL: shl_i512_1:
 ; AVX512VBMI:       # %bb.0:
-; AVX512VBMI-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512VBMI-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
-; AVX512VBMI-NEXT:    vextracti32x4 $3, %zmm0, %xmm3
-; AVX512VBMI-NEXT:    vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
-; AVX512VBMI-NEXT:    vpshldq $1, %xmm3, %xmm4, %xmm4
-; AVX512VBMI-NEXT:    vpshufd {{.*#+}} xmm5 = xmm2[2,3,2,3]
-; AVX512VBMI-NEXT:    vpshldq $1, %xmm2, %xmm5, %xmm6
-; AVX512VBMI-NEXT:    vinserti128 $1, %xmm4, %ymm6, %ymm4
-; AVX512VBMI-NEXT:    vpshufd {{.*#+}} xmm6 = xmm1[2,3,2,3]
-; AVX512VBMI-NEXT:    vpshldq $1, %xmm1, %xmm6, %xmm7
-; AVX512VBMI-NEXT:    vpshufd {{.*#+}} xmm8 = xmm0[2,3,2,3]
-; AVX512VBMI-NEXT:    vpshldq $1, %xmm0, %xmm8, %xmm9
-; AVX512VBMI-NEXT:    vinserti128 $1, %xmm7, %ymm9, %ymm7
-; AVX512VBMI-NEXT:    vinserti64x4 $1, %ymm4, %zmm7, %zmm4
-; AVX512VBMI-NEXT:    vpshldq $1, %xmm8, %xmm1, %xmm1
-; AVX512VBMI-NEXT:    vpsllq $1, %xmm0, %xmm0
-; AVX512VBMI-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512VBMI-NEXT:    vpshldq $1, %xmm5, %xmm3, %xmm1
-; AVX512VBMI-NEXT:    vpshldq $1, %xmm6, %xmm2, %xmm2
+; AVX512VBMI-NEXT:    vextracti32x4 $2, %zmm0, %xmm1
+; AVX512VBMI-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; AVX512VBMI-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
+; AVX512VBMI-NEXT:    vpshldq $1, %xmm3, %xmm2, %xmm3
+; AVX512VBMI-NEXT:    vpsllq $1, %xmm0, %xmm4
+; AVX512VBMI-NEXT:    vinserti128 $1, %xmm3, %ymm4, %ymm3
 ; AVX512VBMI-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
-; AVX512VBMI-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512VBMI-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm4[0],zmm0[2],zmm4[2],zmm0[4],zmm4[4],zmm0[6],zmm4[6]
+; AVX512VBMI-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
+; AVX512VBMI-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
+; AVX512VBMI-NEXT:    vpshldq $1, %ymm1, %ymm2, %ymm1
+; AVX512VBMI-NEXT:    vinserti64x4 $1, %ymm1, %zmm3, %zmm1
+; AVX512VBMI-NEXT:    vpshufd {{.*#+}} zmm2 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
+; AVX512VBMI-NEXT:    vpshldq $1, %zmm0, %zmm2, %zmm0
+; AVX512VBMI-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm1[0],zmm0[0],zmm1[2],zmm0[2],zmm1[4],zmm0[4],zmm1[6],zmm0[6]
 ; AVX512VBMI-NEXT:    retq
 ;
 ; ZNVER4-LABEL: shl_i512_1:
@@ -131,27 +123,22 @@ define <8 x i64> @lshr_i512_1(<8 x i64> %a)  {
 ;
 ; AVX512VBMI-LABEL: lshr_i512_1:
 ; AVX512VBMI:       # %bb.0:
-; AVX512VBMI-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512VBMI-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
 ; AVX512VBMI-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
-; AVX512VBMI-NEXT:    vextracti32x4 $3, %zmm0, %xmm3
+; AVX512VBMI-NEXT:    vextracti128 $1, %ymm0, %xmm3
 ; AVX512VBMI-NEXT:    vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
-; AVX512VBMI-NEXT:    vpshldq $63, %xmm3, %xmm4, %xmm5
-; AVX512VBMI-NEXT:    vpshufd {{.*#+}} xmm6 = xmm2[2,3,2,3]
-; AVX512VBMI-NEXT:    vpshldq $63, %xmm2, %xmm6, %xmm7
-; AVX512VBMI-NEXT:    vinserti128 $1, %xmm5, %ymm7, %ymm5
-; AVX512VBMI-NEXT:    vpshufd {{.*#+}} xmm7 = xmm1[2,3,2,3]
-; AVX512VBMI-NEXT:    vpshldq $63, %xmm1, %xmm7, %xmm8
-; AVX512VBMI-NEXT:    vpshufd {{.*#+}} xmm9 = xmm0[2,3,2,3]
-; AVX512VBMI-NEXT:    vpshldq $63, %xmm0, %xmm9, %xmm0
-; AVX512VBMI-NEXT:    vinserti128 $1, %xmm8, %ymm0, %ymm0
-; AVX512VBMI-NEXT:    vinserti64x4 $1, %ymm5, %zmm0, %zmm0
-; AVX512VBMI-NEXT:    vpshldq $63, %xmm7, %xmm2, %xmm2
-; AVX512VBMI-NEXT:    vpshldq $63, %xmm9, %xmm1, %xmm1
-; AVX512VBMI-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
-; AVX512VBMI-NEXT:    vpshldq $63, %xmm6, %xmm3, %xmm2
-; AVX512VBMI-NEXT:    vpsrlq $1, %xmm4, %xmm3
-; AVX512VBMI-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
-; AVX512VBMI-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm1
+; AVX512VBMI-NEXT:    vpshldq $63, %xmm4, %xmm2, %xmm4
+; AVX512VBMI-NEXT:    vpshufd {{.*#+}} xmm5 = xmm0[2,3,2,3]
+; AVX512VBMI-NEXT:    vpshldq $63, %xmm5, %xmm3, %xmm3
+; AVX512VBMI-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
+; AVX512VBMI-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
+; AVX512VBMI-NEXT:    vpshldq $63, %xmm2, %xmm1, %xmm2
+; AVX512VBMI-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
+; AVX512VBMI-NEXT:    vpsrlq $1, %xmm1, %xmm1
+; AVX512VBMI-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
+; AVX512VBMI-NEXT:    vinserti64x4 $1, %ymm1, %zmm3, %zmm1
+; AVX512VBMI-NEXT:    vpshufd {{.*#+}} zmm2 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
+; AVX512VBMI-NEXT:    vpshldq $63, %zmm0, %zmm2, %zmm0
 ; AVX512VBMI-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
 ; AVX512VBMI-NEXT:    retq
 ;
@@ -251,27 +238,22 @@ define <8 x i64> @ashr_i512_1(<8 x i64> %a)  {
 ;
 ; AVX512VBMI-LABEL: ashr_i512_1:
 ; AVX512VBMI:       # %bb.0:
-; AVX512VBMI-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512VBMI-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
 ; AVX512VBMI-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
-; AVX512VBMI-NEXT:    vextracti32x4 $3, %zmm0, %xmm3
+; AVX512VBMI-NEXT:    vextracti128 $1, %ymm0, %xmm3
 ; AVX512VBMI-NEXT:    vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
-; AVX512VBMI-NEXT:    vpshldq $63, %xmm3, %xmm4, %xmm5
-; AVX512VBMI-NEXT:    vpshufd {{.*#+}} xmm6 = xmm2[2,3,2,3]
-; AVX512VBMI-NEXT:    vpshldq $63, %xmm2, %xmm6, %xmm7
-; AVX512VBMI-NEXT:    vinserti128 $1, %xmm5, %ymm7, %ymm5
-; AVX512VBMI-NEXT:    vpshufd {{.*#+}} xmm7 = xmm1[2,3,2,3]
-; AVX512VBMI-NEXT:    vpshldq $63, %xmm1, %xmm7, %xmm8
-; AVX512VBMI-NEXT:    vpshufd {{.*#+}} xmm9 = xmm0[2,3,2,3]
-; AVX512VBMI-NEXT:    vpshldq $63, %xmm0, %xmm9, %xmm0
-; AVX512VBMI-NEXT:    vinserti128 $1, %xmm8, %ymm0, %ymm0
-; AVX512VBMI-NEXT:    vinserti64x4 $1, %ymm5, %zmm0, %zmm0
-; AVX512VBMI-NEXT:    vpshldq $63, %xmm7, %xmm2, %xmm2
-; AVX512VBMI-NEXT:    vpshldq $63, %xmm9, %xmm1, %xmm1
-; AVX512VBMI-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
-; AVX512VBMI-NEXT:    vpshldq $63, %xmm6, %xmm3, %xmm2
-; AVX512VBMI-NEXT:    vpsraq $1, %xmm4, %xmm3
-; AVX512VBMI-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
-; AVX512VBMI-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm1
+; AVX512VBMI-NEXT:    vpshldq $63, %xmm4, %xmm2, %xmm4
+; AVX512VBMI-NEXT:    vpshufd {{.*#+}} xmm5 = xmm0[2,3,2,3]
+; AVX512VBMI-NEXT:    vpshldq $63, %xmm5, %xmm3, %xmm3
+; AVX512VBMI-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
+; AVX512VBMI-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
+; AVX512VBMI-NEXT:    vpshldq $63, %xmm2, %xmm1, %xmm2
+; AVX512VBMI-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
+; AVX512VBMI-NEXT:    vpsraq $1, %xmm1, %xmm1
+; AVX512VBMI-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
+; AVX512VBMI-NEXT:    vinserti64x4 $1, %ymm1, %zmm3, %zmm1
+; AVX512VBMI-NEXT:    vpshufd {{.*#+}} zmm2 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
+; AVX512VBMI-NEXT:    vpshldq $63, %zmm0, %zmm2, %zmm0
 ; AVX512VBMI-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
 ; AVX512VBMI-NEXT:    retq
 ;

@RKSimon RKSimon merged commit 449e3fa into llvm:main Mar 25, 2025
13 checks passed
@RKSimon RKSimon deleted the x86-concat-vshld branch March 25, 2025 13:23
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants