Skip to content

[GlobalISel] Add support for interleave and deinterleave intrinsics to IRTranslator #85199

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 15, 2024

Conversation

dc03-work
Copy link
Contributor

This patch adds support for the @llvm.experimental.vector.{interleave2, deinterleave2} intrinsics to IRTranslator for fixed-width vector types. They are lowered to vector shuffles, in roughly the same manner as SelectionDAG.

@llvmbot
Copy link
Member

llvmbot commented Mar 14, 2024

@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-globalisel

Author: Dhruv Chawla (dc03-work)

Changes

This patch adds support for the @llvm.experimental.vector.{interleave2, deinterleave2} intrinsics to IRTranslator for fixed-width vector types. They are lowered to vector shuffles, in roughly the same manner as SelectionDAG.


Full diff: https://github.com/llvm/llvm-project/pull/85199.diff

4 Files Affected:

  • (modified) llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp (+44)
  • (modified) llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll (+56-25)
  • (modified) llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll (+54-25)
  • (modified) llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll (+33-18)
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 94fdb37e283bb4..9db77adf0aaae2 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
 #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
@@ -2474,6 +2475,49 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
 
     return true;
   }
+
+  case Intrinsic::experimental_vector_interleave2: {
+    Value *Src0 = CI.getOperand(0);
+    Value *Src1 = CI.getOperand(1);
+
+    // Canonicalize fixed-width vector types to G_SHUFFLE_VECTOR
+    // (similar to SelectionDAG)
+    LLT OpType = getLLTForType(*Src0->getType(), MIRBuilder.getDataLayout());
+    if (!OpType.isFixedVector())
+      break;
+
+    Register Op0 = getOrCreateVReg(*Src0);
+    Register Op1 = getOrCreateVReg(*Src1);
+    Register Res = getOrCreateVReg(CI);
+
+    MIRBuilder.buildShuffleVector(
+        Res, Op0, Op1, createInterleaveMask(OpType.getNumElements(), 2));
+
+    return true;
+  }
+
+  case Intrinsic::experimental_vector_deinterleave2: {
+    Value *Src = CI.getOperand(0);
+
+    // Canonicalize fixed-width vector types to shuffles that extract
+    // sub-vectors (similar to SelectionDAG)
+    ArrayRef<Register> Res = getOrCreateVRegs(CI);
+    LLT ResTy = MRI->getType(Res[0]);
+    if (!ResTy.isFixedVector())
+      break;
+
+    Register Op = getOrCreateVReg(*Src);
+    LLT OpType = getLLTForType(*Src->getType(), MIRBuilder.getDataLayout());
+
+    auto Undef = MIRBuilder.buildUndef(OpType);
+    MIRBuilder.buildShuffleVector(
+        Res[0], Op, Undef, createStrideMask(0, 2, ResTy.getNumElements()));
+    MIRBuilder.buildShuffleVector(
+        Res[1], Op, Undef, createStrideMask(1, 2, ResTy.getNumElements()));
+
+    return true;
+  }
+
 #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC)  \
   case Intrinsic::INTRINSIC:
 #include "llvm/IR/ConstrainedOps.def"
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll
index 7b8448de2331b4..7cdb10e7159f03 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll
@@ -1,23 +1,42 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s
-; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve -o - | FileCheck %s
-; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve2 -o - | FileCheck %s
+; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve2 -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s --global-isel --global-isel-abort=2 --mattr=+complxnum,+neon,+fullfp16 -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc < %s --global-isel --global-isel-abort=2 --mattr=+complxnum,+neon,+fullfp16,+sve -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc < %s --global-isel --global-isel-abort=2 --mattr=+complxnum,+neon,+fullfp16,+sve2 -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 target triple = "aarch64"
 
+; CHECK-GI:      warning: Instruction selection used fallback path for complex_add_v16f16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for complex_add_v32f16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for complex_add_v16f16_with_intrinsic
+
 ; Expected to not transform
 define <2 x half> @complex_add_v2f16(<2 x half> %a, <2 x half> %b) {
-; CHECK-LABEL: complex_add_v2f16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov h2, v0.h[1]
-; CHECK-NEXT:    mov h3, v1.h[1]
-; CHECK-NEXT:    fsub h1, h1, h2
-; CHECK-NEXT:    fadd h0, h3, h0
-; CHECK-NEXT:    mov v1.h[1], v0.h[0]
-; CHECK-NEXT:    fmov d0, d1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: complex_add_v2f16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    mov h2, v0.h[1]
+; CHECK-SD-NEXT:    mov h3, v1.h[1]
+; CHECK-SD-NEXT:    fsub h1, h1, h2
+; CHECK-SD-NEXT:    fadd h0, h3, h0
+; CHECK-SD-NEXT:    mov v1.h[1], v0.h[0]
+; CHECK-SD-NEXT:    fmov d0, d1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: complex_add_v2f16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    mov h2, v0.h[1]
+; CHECK-GI-NEXT:    mov h3, v1.h[1]
+; CHECK-GI-NEXT:    fsub h1, h1, h2
+; CHECK-GI-NEXT:    fadd h0, h3, h0
+; CHECK-GI-NEXT:    mov v1.h[1], v0.h[0]
+; CHECK-GI-NEXT:    fmov d0, d1
+; CHECK-GI-NEXT:    ret
 entry:
   %a.real = shufflevector <2 x half> %a, <2 x half> zeroinitializer, <1 x i32> <i32 0>
   %a.imag = shufflevector <2 x half> %a, <2 x half> zeroinitializer, <1 x i32> <i32 1>
@@ -162,17 +181,29 @@ entry:
 
 ; Expected not to transform as it is integer
 define <16 x i16> @complex_add_v16i16(<16 x i16> %a, <16 x i16> %b) {
-; CHECK-LABEL: complex_add_v16i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    uzp1 v4.8h, v2.8h, v3.8h
-; CHECK-NEXT:    uzp1 v5.8h, v0.8h, v1.8h
-; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    uzp2 v1.8h, v2.8h, v3.8h
-; CHECK-NEXT:    sub v2.8h, v4.8h, v0.8h
-; CHECK-NEXT:    add v1.8h, v1.8h, v5.8h
-; CHECK-NEXT:    zip1 v0.8h, v2.8h, v1.8h
-; CHECK-NEXT:    zip2 v1.8h, v2.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: complex_add_v16i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    uzp1 v4.8h, v2.8h, v3.8h
+; CHECK-SD-NEXT:    uzp1 v5.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    uzp2 v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    uzp2 v1.8h, v2.8h, v3.8h
+; CHECK-SD-NEXT:    sub v2.8h, v4.8h, v0.8h
+; CHECK-SD-NEXT:    add v1.8h, v1.8h, v5.8h
+; CHECK-SD-NEXT:    zip1 v0.8h, v2.8h, v1.8h
+; CHECK-SD-NEXT:    zip2 v1.8h, v2.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: complex_add_v16i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    uzp1 v4.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp2 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    uzp2 v2.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT:    sub v1.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    add v2.8h, v2.8h, v4.8h
+; CHECK-GI-NEXT:    zip1 v0.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT:    zip2 v1.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %a.real = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   %a.imag = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
index 1b1cfead0f97ac..2ad5623b655176 100644
--- a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
+++ b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
@@ -1,29 +1,50 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 define {<2 x half>, <2 x half>} @vector_deinterleave_v2f16_v4f16(<4 x half> %vec) {
-; CHECK-LABEL: vector_deinterleave_v2f16_v4f16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    dup v2.2s, v0.s[1]
-; CHECK-NEXT:    mov v1.16b, v2.16b
-; CHECK-NEXT:    mov v1.h[0], v0.h[1]
-; CHECK-NEXT:    mov v0.h[1], v2.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: vector_deinterleave_v2f16_v4f16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    dup v2.2s, v0.s[1]
+; CHECK-SD-NEXT:    mov v1.16b, v2.16b
+; CHECK-SD-NEXT:    mov v1.h[0], v0.h[1]
+; CHECK-SD-NEXT:    mov v0.h[1], v2.h[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: vector_deinterleave_v2f16_v4f16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    uzp1 v2.4h, v0.4h, v0.4h
+; CHECK-GI-NEXT:    uzp2 v1.4h, v0.4h, v0.4h
+; CHECK-GI-NEXT:    mov h0, v2.h[1]
+; CHECK-GI-NEXT:    mov h3, v1.h[1]
+; CHECK-GI-NEXT:    mov v2.h[1], v0.h[0]
+; CHECK-GI-NEXT:    mov v1.h[1], v3.h[0]
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-GI-NEXT:    fmov d0, d2
+; CHECK-GI-NEXT:    ret
   %retval = call {<2 x half>, <2 x half>} @llvm.experimental.vector.deinterleave2.v4f16(<4 x half> %vec)
   ret {<2 x half>, <2 x half>}   %retval
 }
 
 define {<4 x half>, <4 x half>} @vector_deinterleave_v4f16_v8f16(<8 x half> %vec) {
-; CHECK-LABEL: vector_deinterleave_v4f16_v8f16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    uzp1 v2.4h, v0.4h, v1.4h
-; CHECK-NEXT:    uzp2 v1.4h, v0.4h, v1.4h
-; CHECK-NEXT:    fmov d0, d2
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: vector_deinterleave_v4f16_v8f16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    uzp1 v2.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT:    uzp2 v1.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT:    fmov d0, d2
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: vector_deinterleave_v4f16_v8f16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    uzp1 v2.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT:    uzp2 v1.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-GI-NEXT:    fmov d0, d2
+; CHECK-GI-NEXT:    ret
   %retval = call {<4 x half>, <4 x half>} @llvm.experimental.vector.deinterleave2.v8f16(<8 x half> %vec)
   ret {<4 x half>, <4 x half>}   %retval
 }
@@ -40,13 +61,21 @@ define {<8 x half>, <8 x half>} @vector_deinterleave_v8f16_v16f16(<16 x half> %v
 }
 
 define {<2 x float>, <2 x float>} @vector_deinterleave_v2f32_v4f32(<4 x float> %vec) {
-; CHECK-LABEL: vector_deinterleave_v2f32_v4f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    zip1 v2.2s, v0.2s, v1.2s
-; CHECK-NEXT:    zip2 v1.2s, v0.2s, v1.2s
-; CHECK-NEXT:    fmov d0, d2
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: vector_deinterleave_v2f32_v4f32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    zip1 v2.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT:    zip2 v1.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT:    fmov d0, d2
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: vector_deinterleave_v2f32_v4f32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    uzp1 v2.4s, v0.4s, v0.4s
+; CHECK-GI-NEXT:    uzp2 v1.4s, v0.4s, v0.4s
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-GI-NEXT:    fmov d0, d2
+; CHECK-GI-NEXT:    ret
   %retval = call {<2 x float>, <2 x float>} @llvm.experimental.vector.deinterleave2.v4f32(<4 x float> %vec)
   ret {<2 x float>, <2 x float>}   %retval
 }
diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
index 071c1ffdbb45dc..eb81aff33e4963 100644
--- a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
+++ b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 define <4 x half> @interleave2_v4f16(<2 x half> %vec0, <2 x half> %vec1) {
 ; CHECK-LABEL: interleave2_v4f16:
@@ -11,15 +12,22 @@ define <4 x half> @interleave2_v4f16(<2 x half> %vec0, <2 x half> %vec1) {
 }
 
 define <8 x half> @interleave2_v8f16(<4 x half> %vec0, <4 x half> %vec1) {
-; CHECK-LABEL: interleave2_v8f16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    adrp x8, .LCPI1_0
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI1_0]
-; CHECK-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: interleave2_v8f16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    adrp x8, .LCPI1_0
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI1_0]
+; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: interleave2_v8f16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    zip1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ret
   %retval = call <8 x half> @llvm.experimental.vector.interleave2.v8f16(<4 x half> %vec0, <4 x half> %vec1)
   ret <8 x half> %retval
 }
@@ -36,14 +44,21 @@ define <16 x half> @interleave2_v16f16(<8 x half> %vec0, <8 x half> %vec1) {
 }
 
 define <4 x float> @interleave2_v4f32(<2 x float> %vec0, <2 x float> %vec1) {
-; CHECK-LABEL: interleave2_v4f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-NEXT:    rev64 v1.4s, v0.4s
-; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: interleave2_v4f32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    rev64 v1.4s, v0.4s
+; CHECK-SD-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: interleave2_v4f32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    zip1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    ret
   %retval = call <4 x float> @llvm.experimental.vector.interleave2.v4f32(<2 x float> %vec0, <2 x float> %vec1)
   ret <4 x float> %retval
 }


case Intrinsic::experimental_vector_interleave2: {
Value *Src0 = CI.getOperand(0);
Value *Src1 = CI.getOperand(1);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can move these definitions below and early exit based on OpType.isFixedVector()

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

MIRBuilder.buildShuffleVector(
Res, Op0, Op1, createInterleaveMask(OpType.getNumElements(), 2));

return true;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ideally, I would like to outline this code to a function and call based on the condition to be met. This would look a lot cleaner.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

call based on the condition to be met.

I'm not entirely sure what you mean by this, can you please clarify?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have tried refactoring the code, is it okay now?

MIRBuilder.buildShuffleVector(
Res[1], Op, Undef, createStrideMask(1, 2, ResTy.getNumElements()));

return true;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment for moving definition of Src below and outlining.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

Comment on lines +2 to +7
; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve2 -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s --global-isel --global-isel-abort=2 --mattr=+complxnum,+neon,+fullfp16 -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; RUN: llc < %s --global-isel --global-isel-abort=2 --mattr=+complxnum,+neon,+fullfp16,+sve -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; RUN: llc < %s --global-isel --global-isel-abort=2 --mattr=+complxnum,+neon,+fullfp16,+sve2 -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think too much is changing in this test that's only tangentially related to the change. Can you add a dedicated IRTranslator test for this, like others in test/CodeGen/AArch64/GlobalISel/irtranslator-*?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have added two test files for this.

Copy link
Collaborator

@qcolombet qcolombet left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@qcolombet
Copy link
Collaborator

LGTM

But fix @madhur13490 concerns first, of couse!

@dc03-work dc03-work requested review from madhur13490 and arsenm March 14, 2024 11:51
Copy link
Contributor

@madhur13490 madhur13490 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM. Nit: Please have an assert in both functions for the expected intrinsic.

…o IRTranslator

This patch adds support for the @llvm.experimental.vector.{interleave2,
deinterleave2} intrinsics to IRTranslator for fixed-width vector types.
They are lowered to vector shuffles, in roughly the same manner as
SelectionDAG.
@dc03-work dc03-work merged commit a7f3d17 into llvm:main Mar 15, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

5 participants