-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[GlobalISel] Add support for interleave and deinterleave intrinsics to IRTranslator #85199
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-globalisel Author: Dhruv Chawla (dc03-work) ChangesThis patch adds support for the @llvm.experimental.vector.{interleave2, deinterleave2} intrinsics to IRTranslator for fixed-width vector types. They are lowered to vector shuffles, in roughly the same manner as SelectionDAG. Full diff: https://github.com/llvm/llvm-project/pull/85199.diff 4 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 94fdb37e283bb4..9db77adf0aaae2 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
@@ -2474,6 +2475,49 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return true;
}
+
+ case Intrinsic::experimental_vector_interleave2: {
+ Value *Src0 = CI.getOperand(0);
+ Value *Src1 = CI.getOperand(1);
+
+ // Canonicalize fixed-width vector types to G_SHUFFLE_VECTOR
+ // (similar to SelectionDAG)
+ LLT OpType = getLLTForType(*Src0->getType(), MIRBuilder.getDataLayout());
+ if (!OpType.isFixedVector())
+ break;
+
+ Register Op0 = getOrCreateVReg(*Src0);
+ Register Op1 = getOrCreateVReg(*Src1);
+ Register Res = getOrCreateVReg(CI);
+
+ MIRBuilder.buildShuffleVector(
+ Res, Op0, Op1, createInterleaveMask(OpType.getNumElements(), 2));
+
+ return true;
+ }
+
+ case Intrinsic::experimental_vector_deinterleave2: {
+ Value *Src = CI.getOperand(0);
+
+ // Canonicalize fixed-width vector types to shuffles that extract
+ // sub-vectors (similar to SelectionDAG)
+ ArrayRef<Register> Res = getOrCreateVRegs(CI);
+ LLT ResTy = MRI->getType(Res[0]);
+ if (!ResTy.isFixedVector())
+ break;
+
+ Register Op = getOrCreateVReg(*Src);
+ LLT OpType = getLLTForType(*Src->getType(), MIRBuilder.getDataLayout());
+
+ auto Undef = MIRBuilder.buildUndef(OpType);
+ MIRBuilder.buildShuffleVector(
+ Res[0], Op, Undef, createStrideMask(0, 2, ResTy.getNumElements()));
+ MIRBuilder.buildShuffleVector(
+ Res[1], Op, Undef, createStrideMask(1, 2, ResTy.getNumElements()));
+
+ return true;
+ }
+
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll
index 7b8448de2331b4..7cdb10e7159f03 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll
@@ -1,23 +1,42 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s
-; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve -o - | FileCheck %s
-; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve2 -o - | FileCheck %s
+; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve2 -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s --global-isel --global-isel-abort=2 --mattr=+complxnum,+neon,+fullfp16 -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc < %s --global-isel --global-isel-abort=2 --mattr=+complxnum,+neon,+fullfp16,+sve -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc < %s --global-isel --global-isel-abort=2 --mattr=+complxnum,+neon,+fullfp16,+sve2 -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
target triple = "aarch64"
+; CHECK-GI: warning: Instruction selection used fallback path for complex_add_v16f16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for complex_add_v32f16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for complex_add_v16f16_with_intrinsic
+
; Expected to not transform
define <2 x half> @complex_add_v2f16(<2 x half> %a, <2 x half> %b) {
-; CHECK-LABEL: complex_add_v2f16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov h2, v0.h[1]
-; CHECK-NEXT: mov h3, v1.h[1]
-; CHECK-NEXT: fsub h1, h1, h2
-; CHECK-NEXT: fadd h0, h3, h0
-; CHECK-NEXT: mov v1.h[1], v0.h[0]
-; CHECK-NEXT: fmov d0, d1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: complex_add_v2f16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: mov h2, v0.h[1]
+; CHECK-SD-NEXT: mov h3, v1.h[1]
+; CHECK-SD-NEXT: fsub h1, h1, h2
+; CHECK-SD-NEXT: fadd h0, h3, h0
+; CHECK-SD-NEXT: mov v1.h[1], v0.h[0]
+; CHECK-SD-NEXT: fmov d0, d1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: complex_add_v2f16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: mov h2, v0.h[1]
+; CHECK-GI-NEXT: mov h3, v1.h[1]
+; CHECK-GI-NEXT: fsub h1, h1, h2
+; CHECK-GI-NEXT: fadd h0, h3, h0
+; CHECK-GI-NEXT: mov v1.h[1], v0.h[0]
+; CHECK-GI-NEXT: fmov d0, d1
+; CHECK-GI-NEXT: ret
entry:
%a.real = shufflevector <2 x half> %a, <2 x half> zeroinitializer, <1 x i32> <i32 0>
%a.imag = shufflevector <2 x half> %a, <2 x half> zeroinitializer, <1 x i32> <i32 1>
@@ -162,17 +181,29 @@ entry:
; Expected not to transform as it is integer
define <16 x i16> @complex_add_v16i16(<16 x i16> %a, <16 x i16> %b) {
-; CHECK-LABEL: complex_add_v16i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: uzp1 v4.8h, v2.8h, v3.8h
-; CHECK-NEXT: uzp1 v5.8h, v0.8h, v1.8h
-; CHECK-NEXT: uzp2 v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: uzp2 v1.8h, v2.8h, v3.8h
-; CHECK-NEXT: sub v2.8h, v4.8h, v0.8h
-; CHECK-NEXT: add v1.8h, v1.8h, v5.8h
-; CHECK-NEXT: zip1 v0.8h, v2.8h, v1.8h
-; CHECK-NEXT: zip2 v1.8h, v2.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: complex_add_v16i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: uzp1 v4.8h, v2.8h, v3.8h
+; CHECK-SD-NEXT: uzp1 v5.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: uzp2 v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: uzp2 v1.8h, v2.8h, v3.8h
+; CHECK-SD-NEXT: sub v2.8h, v4.8h, v0.8h
+; CHECK-SD-NEXT: add v1.8h, v1.8h, v5.8h
+; CHECK-SD-NEXT: zip1 v0.8h, v2.8h, v1.8h
+; CHECK-SD-NEXT: zip2 v1.8h, v2.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: complex_add_v16i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: uzp1 v4.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: uzp2 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT: uzp2 v2.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT: sub v1.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: add v2.8h, v2.8h, v4.8h
+; CHECK-GI-NEXT: zip1 v0.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT: zip2 v1.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT: ret
entry:
%a.real = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%a.imag = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
index 1b1cfead0f97ac..2ad5623b655176 100644
--- a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
+++ b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
@@ -1,29 +1,50 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define {<2 x half>, <2 x half>} @vector_deinterleave_v2f16_v4f16(<4 x half> %vec) {
-; CHECK-LABEL: vector_deinterleave_v2f16_v4f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: dup v2.2s, v0.s[1]
-; CHECK-NEXT: mov v1.16b, v2.16b
-; CHECK-NEXT: mov v1.h[0], v0.h[1]
-; CHECK-NEXT: mov v0.h[1], v2.h[0]
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: vector_deinterleave_v2f16_v4f16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: dup v2.2s, v0.s[1]
+; CHECK-SD-NEXT: mov v1.16b, v2.16b
+; CHECK-SD-NEXT: mov v1.h[0], v0.h[1]
+; CHECK-SD-NEXT: mov v0.h[1], v2.h[0]
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vector_deinterleave_v2f16_v4f16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uzp1 v2.4h, v0.4h, v0.4h
+; CHECK-GI-NEXT: uzp2 v1.4h, v0.4h, v0.4h
+; CHECK-GI-NEXT: mov h0, v2.h[1]
+; CHECK-GI-NEXT: mov h3, v1.h[1]
+; CHECK-GI-NEXT: mov v2.h[1], v0.h[0]
+; CHECK-GI-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-GI-NEXT: fmov d0, d2
+; CHECK-GI-NEXT: ret
%retval = call {<2 x half>, <2 x half>} @llvm.experimental.vector.deinterleave2.v4f16(<4 x half> %vec)
ret {<2 x half>, <2 x half>} %retval
}
define {<4 x half>, <4 x half>} @vector_deinterleave_v4f16_v8f16(<8 x half> %vec) {
-; CHECK-LABEL: vector_deinterleave_v4f16_v8f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: uzp1 v2.4h, v0.4h, v1.4h
-; CHECK-NEXT: uzp2 v1.4h, v0.4h, v1.4h
-; CHECK-NEXT: fmov d0, d2
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: vector_deinterleave_v4f16_v8f16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: uzp1 v2.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT: uzp2 v1.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT: fmov d0, d2
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vector_deinterleave_v4f16_v8f16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uzp1 v2.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT: uzp2 v1.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-GI-NEXT: fmov d0, d2
+; CHECK-GI-NEXT: ret
%retval = call {<4 x half>, <4 x half>} @llvm.experimental.vector.deinterleave2.v8f16(<8 x half> %vec)
ret {<4 x half>, <4 x half>} %retval
}
@@ -40,13 +61,21 @@ define {<8 x half>, <8 x half>} @vector_deinterleave_v8f16_v16f16(<16 x half> %v
}
define {<2 x float>, <2 x float>} @vector_deinterleave_v2f32_v4f32(<4 x float> %vec) {
-; CHECK-LABEL: vector_deinterleave_v2f32_v4f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: zip1 v2.2s, v0.2s, v1.2s
-; CHECK-NEXT: zip2 v1.2s, v0.2s, v1.2s
-; CHECK-NEXT: fmov d0, d2
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: vector_deinterleave_v2f32_v4f32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: zip1 v2.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT: zip2 v1.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT: fmov d0, d2
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vector_deinterleave_v2f32_v4f32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uzp1 v2.4s, v0.4s, v0.4s
+; CHECK-GI-NEXT: uzp2 v1.4s, v0.4s, v0.4s
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-GI-NEXT: fmov d0, d2
+; CHECK-GI-NEXT: ret
%retval = call {<2 x float>, <2 x float>} @llvm.experimental.vector.deinterleave2.v4f32(<4 x float> %vec)
ret {<2 x float>, <2 x float>} %retval
}
diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
index 071c1ffdbb45dc..eb81aff33e4963 100644
--- a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
+++ b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define <4 x half> @interleave2_v4f16(<2 x half> %vec0, <2 x half> %vec1) {
; CHECK-LABEL: interleave2_v4f16:
@@ -11,15 +12,22 @@ define <4 x half> @interleave2_v4f16(<2 x half> %vec0, <2 x half> %vec1) {
}
define <8 x half> @interleave2_v8f16(<4 x half> %vec0, <4 x half> %vec1) {
-; CHECK-LABEL: interleave2_v8f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: adrp x8, .LCPI1_0
-; CHECK-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
-; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: interleave2_v8f16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: adrp x8, .LCPI1_0
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
+; CHECK-SD-NEXT: tbl v0.16b, { v0.16b }, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: interleave2_v8f16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: zip1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: ret
%retval = call <8 x half> @llvm.experimental.vector.interleave2.v8f16(<4 x half> %vec0, <4 x half> %vec1)
ret <8 x half> %retval
}
@@ -36,14 +44,21 @@ define <16 x half> @interleave2_v16f16(<8 x half> %vec0, <8 x half> %vec1) {
}
define <4 x float> @interleave2_v4f32(<2 x float> %vec0, <2 x float> %vec1) {
-; CHECK-LABEL: interleave2_v4f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-NEXT: rev64 v1.4s, v0.4s
-; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: interleave2_v4f32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: rev64 v1.4s, v0.4s
+; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: interleave2_v4f32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: zip1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: ret
%retval = call <4 x float> @llvm.experimental.vector.interleave2.v4f32(<2 x float> %vec0, <2 x float> %vec1)
ret <4 x float> %retval
}
|
|
||
case Intrinsic::experimental_vector_interleave2: { | ||
Value *Src0 = CI.getOperand(0); | ||
Value *Src1 = CI.getOperand(1); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can move these definitions below and early exit based on OpType.isFixedVector()
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
MIRBuilder.buildShuffleVector( | ||
Res, Op0, Op1, createInterleaveMask(OpType.getNumElements(), 2)); | ||
|
||
return true; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ideally, I would like to outline this code to a function and call based on the condition to be met. This would look a lot cleaner.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
call based on the condition to be met.
I'm not entirely sure what you mean by this, can you please clarify?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have tried refactoring the code, is it okay now?
MIRBuilder.buildShuffleVector( | ||
Res[1], Op, Undef, createStrideMask(1, 2, ResTy.getNumElements())); | ||
|
||
return true; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same comment for moving definition of Src
below and outlining.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD | ||
; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD | ||
; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve2 -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD | ||
; RUN: llc < %s --global-isel --global-isel-abort=2 --mattr=+complxnum,+neon,+fullfp16 -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI | ||
; RUN: llc < %s --global-isel --global-isel-abort=2 --mattr=+complxnum,+neon,+fullfp16,+sve -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI | ||
; RUN: llc < %s --global-isel --global-isel-abort=2 --mattr=+complxnum,+neon,+fullfp16,+sve2 -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think too much is changing in this test that's only tangentially related to the change. Can you add a dedicated IRTranslator test for this, like others in test/CodeGen/AArch64/GlobalISel/irtranslator-*?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree with this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have added two test files for this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
But fix @madhur13490 concerns first, of couse! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Nit: Please have an assert in both functions for the expected intrinsic.
…o IRTranslator This patch adds support for the @llvm.experimental.vector.{interleave2, deinterleave2} intrinsics to IRTranslator for fixed-width vector types. They are lowered to vector shuffles, in roughly the same manner as SelectionDAG.
a2d6d98
to
b6984ad
Compare
This patch adds support for the @llvm.experimental.vector.{interleave2, deinterleave2} intrinsics to IRTranslator for fixed-width vector types. They are lowered to vector shuffles, in roughly the same manner as SelectionDAG.