Skip to content

[X86] combineConcatVectorOps - add concatenation handling for X86ISD::VPERMILPV nodes #132355

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 21, 2025

Conversation

RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Mar 21, 2025

Concat the nodes if we can merge either of the operands for free.

…:VPERMILPV nodes

Concat the nodes if we can merge either of the operands for free.
@llvmbot
Copy link
Member

llvmbot commented Mar 21, 2025

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Concat the nodes if we can merge either of the operands for free.


Full diff: https://github.com/llvm/llvm-project/pull/132355.diff

3 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+11)
  • (modified) llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll (+2-3)
  • (modified) llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll (+13-17)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 8287fb55f22ee..f6b5d4af5ba4e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -58164,6 +58164,17 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
                            DAG.getTargetConstant(Idx, DL, MVT::i8));
       }
       break;
+    case X86ISD::VPERMILPV:
+      if (!IsSplat && (VT.is256BitVector() ||
+                       (VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
+        SDValue Concat0 = CombineSubOperand(VT, Ops, 0);
+        SDValue Concat1 = CombineSubOperand(VT, Ops, 1);
+        if (Concat0 || Concat1)
+          return DAG.getNode(Opcode, DL, VT,
+                             Concat0 ? Concat0 : ConcatSubOperand(VT, Ops, 0),
+                             Concat1 ? Concat1 : ConcatSubOperand(VT, Ops, 1));
+      }
+      break;
     case X86ISD::PSHUFB:
     case X86ISD::PSADBW:
     case X86ISD::VPMADDUBSW:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
index e86ebe63e1f69..2df013d0ff3e3 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
@@ -674,10 +674,9 @@ define <8 x i32> @concat_self_v8i32(<4 x i32> %x) {
 define <4 x double> @concat_vpermilvar_v4f64_v2f64(<2 x double> %a0, <2 x double> %a1, <4 x i64> %m) {
 ; CHECK-LABEL: concat_vpermilvar_v4f64_v2f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vextractf128 $1, %ymm2, %xmm3
-; CHECK-NEXT:    vpermilpd %xmm2, %xmm0, %xmm0
-; CHECK-NEXT:    vpermilpd %xmm3, %xmm1, %xmm1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 ; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpermilpd %ymm2, %ymm0, %ymm0
 ; CHECK-NEXT:    ret{{[l|q]}}
   %m0 = shufflevector <4 x i64> %m, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
   %m1 = shufflevector <4 x i64> %m, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
index 6ffb3be1a7bc5..a28eba39685cb 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
@@ -985,29 +985,26 @@ define <8 x double> @concat_vpermilvar_v8f64_v2f64(<2 x double> %a0, <2 x double
 ; X86-NEXT:    movl %esp, %ebp
 ; X86-NEXT:    andl $-64, %esp
 ; X86-NEXT:    subl $64, %esp
-; X86-NEXT:    vmovapd 8(%ebp), %xmm3
-; X86-NEXT:    vpermilpd 72(%ebp), %xmm0, %xmm0
-; X86-NEXT:    vpermilpd 88(%ebp), %xmm1, %xmm1
-; X86-NEXT:    vpermilpd 104(%ebp), %xmm2, %xmm2
-; X86-NEXT:    vpermilpd 120(%ebp), %xmm3, %xmm3
-; X86-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; X86-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
+; X86-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 ; X86-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X86-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; X86-NEXT:    vinsertf128 $1, 8(%ebp), %ymm2, %ymm1
+; X86-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; X86-NEXT:    vpermilpd 72(%ebp), %zmm0, %zmm0
 ; X86-NEXT:    movl %ebp, %esp
 ; X86-NEXT:    popl %ebp
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: concat_vpermilvar_v8f64_v2f64:
 ; X64:       # %bb.0:
-; X64-NEXT:    vextractf128 $1, %ymm4, %xmm5
-; X64-NEXT:    vextractf32x4 $2, %zmm4, %xmm6
-; X64-NEXT:    vextractf32x4 $3, %zmm4, %xmm7
-; X64-NEXT:    vpermilpd %xmm4, %xmm0, %xmm0
-; X64-NEXT:    vpermilpd %xmm5, %xmm1, %xmm1
-; X64-NEXT:    vpermilpd %xmm6, %xmm2, %xmm2
-; X64-NEXT:    vpermilpd %xmm7, %xmm3, %xmm3
+; X64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+; X64-NEXT:    vextractf32x4 $2, %zmm4, %xmm5
+; X64-NEXT:    vextractf32x4 $3, %zmm4, %xmm6
+; X64-NEXT:    vpermilpd %xmm5, %xmm2, %xmm2
+; X64-NEXT:    vpermilpd %xmm6, %xmm3, %xmm3
 ; X64-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
 ; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X64-NEXT:    vpermilpd %ymm4, %ymm0, %ymm0
 ; X64-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
 ; X64-NEXT:    retq
   %m0 = shufflevector <8 x i64> %m, <8 x i64> poison, <2 x i32> <i32 0, i32 1>
@@ -1027,10 +1024,9 @@ define <8 x double> @concat_vpermilvar_v8f64_v2f64(<2 x double> %a0, <2 x double
 define <8 x double> @concat_vpermilvar_v8f64_v4f64(<4 x double> %a0, <4 x double> %a1, <8 x i64> %m) nounwind {
 ; CHECK-LABEL: concat_vpermilvar_v8f64_v4f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vextractf64x4 $1, %zmm2, %ymm3
-; CHECK-NEXT:    vpermilpd %ymm2, %ymm0, %ymm0
-; CHECK-NEXT:    vpermilpd %ymm3, %ymm1, %ymm1
+; CHECK-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; CHECK-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; CHECK-NEXT:    vpermilpd %zmm2, %zmm0, %zmm0
 ; CHECK-NEXT:    ret{{[l|q]}}
   %m0 = shufflevector <8 x i64> %m, <8 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %m1 = shufflevector <8 x i64> %m, <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>

@RKSimon RKSimon merged commit 5488ad8 into llvm:main Mar 21, 2025
13 checks passed
@RKSimon RKSimon deleted the x86-concat-vpermilv branch March 21, 2025 09:40
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants