-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[GlobalISel][AArch64] Legalize G_INSERT_VECTOR_ELT for SVE #114470
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[GlobalISel][AArch64] Legalize G_INSERT_VECTOR_ELT for SVE #114470
Conversation
There are patterns for: * {nxv2s32, s32, s64}, * {nxv4s16, s16, s64}, * {nxv2s16, s16, s64} For {nxv16s8, s32, s64}, {nxv8s16, s32, s64} element type and vector element type differ, which makes LLVM-IR unhappy.
@llvm/pr-subscribers-backend-aarch64 Author: Thorsten Schütt (tschuett) ChangesThere are patterns for:
For {nxv16s8, s32, s64}, {nxv8s16, s32, s64} element type and vector element type differ, which makes LLVM-IR unhappy. Full diff: https://github.com/llvm/llvm-project/pull/114470.diff 5 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index 6d71c150c8da6b..343a0172ff39ed 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -273,6 +273,11 @@ inline LegalityPredicate typeIsNot(unsigned TypeIdx, LLT Type) {
LegalityPredicate
typePairInSet(unsigned TypeIdx0, unsigned TypeIdx1,
std::initializer_list<std::pair<LLT, LLT>> TypesInit);
+/// True iff the given types for the given tuple of type indexes is one of the
+/// specified type tuple.
+LegalityPredicate
+typeTupleInSet(unsigned TypeIdx0, unsigned TypeIdx1, unsigned Type2,
+ std::initializer_list<std::tuple<LLT, LLT, LLT>> TypesInit);
/// True iff the given types for the given pair of type indexes is one of the
/// specified type pairs.
LegalityPredicate typePairAndMemDescInSet(
@@ -504,6 +509,15 @@ class LegalizeRuleSet {
using namespace LegalityPredicates;
return actionIf(Action, typePairInSet(typeIdx(0), typeIdx(1), Types));
}
+
+ LegalizeRuleSet &
+ actionFor(LegalizeAction Action,
+ std::initializer_list<std::tuple<LLT, LLT, LLT>> Types) {
+ using namespace LegalityPredicates;
+ return actionIf(Action,
+ typeTupleInSet(typeIdx(0), typeIdx(1), typeIdx(2), Types));
+ }
+
/// Use the given action when type indexes 0 and 1 is any type pair in the
/// given list.
/// Action should be an action that requires mutation.
@@ -615,6 +629,12 @@ class LegalizeRuleSet {
return *this;
return actionFor(LegalizeAction::Legal, Types);
}
+ LegalizeRuleSet &
+ legalFor(bool Pred, std::initializer_list<std::tuple<LLT, LLT, LLT>> Types) {
+ if (!Pred)
+ return *this;
+ return actionFor(LegalizeAction::Legal, Types);
+ }
/// The instruction is legal when type index 0 is any type in the given list
/// and imm index 0 is anything.
LegalizeRuleSet &legalForTypeWithAnyImm(std::initializer_list<LLT> Types) {
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index 8fe48195c610be..dc7ed6cbe8b7da 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -49,6 +49,17 @@ LegalityPredicate LegalityPredicates::typePairInSet(
};
}
+LegalityPredicate LegalityPredicates::typeTupleInSet(
+ unsigned TypeIdx0, unsigned TypeIdx1, unsigned TypeIdx2,
+ std::initializer_list<std::tuple<LLT, LLT, LLT>> TypesInit) {
+ SmallVector<std::tuple<LLT, LLT, LLT>, 4> Types = TypesInit;
+ return [=](const LegalityQuery &Query) {
+ std::tuple<LLT, LLT, LLT> Match = {
+ Query.Types[TypeIdx0], Query.Types[TypeIdx1], Query.Types[TypeIdx2]};
+ return llvm::is_contained(Types, Match);
+ };
+}
+
LegalityPredicate LegalityPredicates::typePairAndMemDescInSet(
unsigned TypeIdx0, unsigned TypeIdx1, unsigned MMOIdx,
std::initializer_list<TypePairAndMemDesc> TypesAndMemDescInit) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 400024922124cd..31627060c9a6bc 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -978,6 +978,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
.legalIf(
typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64, v2p0}))
+ .legalFor(HasSVE, {{nxv4s32, s32, s64}, {nxv2s64, s64, s64}})
.moreElementsToNextPow2(0)
.widenVectorEltsToVectorMinSize(0, 64)
.clampNumElements(0, v8s8, v16s8)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index b40fe55fdfaf67..2bcfdc1b46873b 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -426,6 +426,9 @@ void applyNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI,
LLT EltTy = MRI.getType(Insert.getElementReg());
LLT IdxTy = MRI.getType(Insert.getIndexReg());
+ if (VecTy.isScalableVector())
+ return;
+
// Create a stack slot and store the vector into it
MachineFunction &MF = Builder.getMF();
Align Alignment(
diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
index 0481d997d24faf..b7cc04fa6edc58 100644
--- a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc -mtriple=aarch64 -mattr=+sve -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -mattr=+sve -aarch64-enable-gisel-sve=1 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; CHECK-GI: warning: Instruction selection used fallback path for extract_v4i32_vector_insert
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for extract_v4i32_vector_insert_const
@@ -143,11 +143,12 @@ define i64 @extract_v2i64_extract_build_vector_opaque(<2 x i64> %a, i32 %c) {
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: sub sp, sp, #16
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
-; CHECK-SD-NEXT: adrp x8, .LCPI8_0
+; CHECK-SD-NEXT: mov x8, #-31 // =0xffffffffffffffe1
; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI8_0]
+; CHECK-SD-NEXT: index z0.d, #0, x8
; CHECK-SD-NEXT: mov x8, sp
; CHECK-SD-NEXT: bfi x8, x0, #3, #1
+; CHECK-SD-NEXT: add z0.d, z0.d, #42 // =0x2a
; CHECK-SD-NEXT: str q0, [sp]
; CHECK-SD-NEXT: ldr x0, [x8]
; CHECK-SD-NEXT: add sp, sp, #16
@@ -1043,11 +1044,10 @@ entry:
define i32 @extract_v4i32_phi(i64 %val, i32 %limit, ptr %ptr) {
; CHECK-SD-LABEL: extract_v4i32_phi:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: dup v1.2s, w0
-; CHECK-SD-NEXT: adrp x8, .LCPI41_0
+; CHECK-SD-NEXT: index z1.s, #1, #1
+; CHECK-SD-NEXT: dup v2.2s, w0
; CHECK-SD-NEXT: movi v0.2s, #16
-; CHECK-SD-NEXT: ldr d2, [x8, :lo12:.LCPI41_0]
-; CHECK-SD-NEXT: add v1.2s, v1.2s, v2.2s
+; CHECK-SD-NEXT: add v1.2s, v2.2s, v1.2s
; CHECK-SD-NEXT: .LBB41_1: // %loop
; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-SD-NEXT: fmov w8, s1
@@ -1100,4 +1100,72 @@ ret:
ret i32 %3
}
+define <vscale x 2 x i64> @insert_vscale_2_i64_zero(<vscale x 2 x i64> %vec, i64 %elt) {
+; CHECK-SD-LABEL: insert_vscale_2_i64_zero:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ptrue p0.d, vl1
+; CHECK-SD-NEXT: mov z0.d, p0/m, x0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: insert_vscale_2_i64_zero:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov x8, xzr
+; CHECK-GI-NEXT: index z1.d, #0, #1
+; CHECK-GI-NEXT: ptrue p0.d
+; CHECK-GI-NEXT: mov z2.d, x8
+; CHECK-GI-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
+; CHECK-GI-NEXT: mov z0.d, p0/m, x0
+; CHECK-GI-NEXT: ret
+entry:
+ %d = insertelement <vscale x 2 x i64> %vec, i64 %elt, i64 0
+ ret <vscale x 2 x i64> %d
+}
+
+define <vscale x 2 x i64> @insert_vscale_2_i64(<vscale x 2 x i64> %vec, i64 %elt, i64 %idx) {
+; CHECK-LABEL: insert_vscale_2_i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: index z1.d, #0, #1
+; CHECK-NEXT: mov z2.d, x1
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
+; CHECK-NEXT: mov z0.d, p0/m, x0
+; CHECK-NEXT: ret
+entry:
+ %d = insertelement <vscale x 2 x i64> %vec, i64 %elt, i64 %idx
+ ret <vscale x 2 x i64> %d
+}
+define <vscale x 4 x i32> @insert_vscale_4_i32_zero(<vscale x 4 x i32> %vec, i32 %elt) {
+; CHECK-SD-LABEL: insert_vscale_4_i32_zero:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ptrue p0.s, vl1
+; CHECK-SD-NEXT: mov z0.s, p0/m, w0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: insert_vscale_4_i32_zero:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov w8, wzr
+; CHECK-GI-NEXT: index z1.s, #0, #1
+; CHECK-GI-NEXT: ptrue p0.s
+; CHECK-GI-NEXT: mov z2.s, w8
+; CHECK-GI-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
+; CHECK-GI-NEXT: mov z0.s, p0/m, w0
+; CHECK-GI-NEXT: ret
+entry:
+ %d = insertelement <vscale x 4 x i32> %vec, i32 %elt, i64 0
+ ret <vscale x 4 x i32> %d
+}
+
+define <vscale x 4 x i32> @insert_vscale_4_i32(<vscale x 4 x i32> %vec, i32 %elt, i64 %idx) {
+; CHECK-LABEL: insert_vscale_4_i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: index z1.s, #0, #1
+; CHECK-NEXT: mov z2.s, w1
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
+; CHECK-NEXT: mov z0.s, p0/m, w0
+; CHECK-NEXT: ret
+entry:
+ %d = insertelement <vscale x 4 x i32> %vec, i32 %elt, i64 %idx
+ ret <vscale x 4 x i32> %d
+}
|
@llvm/pr-subscribers-llvm-globalisel Author: Thorsten Schütt (tschuett) ChangesThere are patterns for:
For {nxv16s8, s32, s64}, {nxv8s16, s32, s64} element type and vector element type differ, which makes LLVM-IR unhappy. Full diff: https://github.com/llvm/llvm-project/pull/114470.diff 5 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index 6d71c150c8da6b..343a0172ff39ed 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -273,6 +273,11 @@ inline LegalityPredicate typeIsNot(unsigned TypeIdx, LLT Type) {
LegalityPredicate
typePairInSet(unsigned TypeIdx0, unsigned TypeIdx1,
std::initializer_list<std::pair<LLT, LLT>> TypesInit);
+/// True iff the given types for the given tuple of type indexes is one of the
+/// specified type tuple.
+LegalityPredicate
+typeTupleInSet(unsigned TypeIdx0, unsigned TypeIdx1, unsigned Type2,
+ std::initializer_list<std::tuple<LLT, LLT, LLT>> TypesInit);
/// True iff the given types for the given pair of type indexes is one of the
/// specified type pairs.
LegalityPredicate typePairAndMemDescInSet(
@@ -504,6 +509,15 @@ class LegalizeRuleSet {
using namespace LegalityPredicates;
return actionIf(Action, typePairInSet(typeIdx(0), typeIdx(1), Types));
}
+
+ LegalizeRuleSet &
+ actionFor(LegalizeAction Action,
+ std::initializer_list<std::tuple<LLT, LLT, LLT>> Types) {
+ using namespace LegalityPredicates;
+ return actionIf(Action,
+ typeTupleInSet(typeIdx(0), typeIdx(1), typeIdx(2), Types));
+ }
+
/// Use the given action when type indexes 0 and 1 is any type pair in the
/// given list.
/// Action should be an action that requires mutation.
@@ -615,6 +629,12 @@ class LegalizeRuleSet {
return *this;
return actionFor(LegalizeAction::Legal, Types);
}
+ LegalizeRuleSet &
+ legalFor(bool Pred, std::initializer_list<std::tuple<LLT, LLT, LLT>> Types) {
+ if (!Pred)
+ return *this;
+ return actionFor(LegalizeAction::Legal, Types);
+ }
/// The instruction is legal when type index 0 is any type in the given list
/// and imm index 0 is anything.
LegalizeRuleSet &legalForTypeWithAnyImm(std::initializer_list<LLT> Types) {
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index 8fe48195c610be..dc7ed6cbe8b7da 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -49,6 +49,17 @@ LegalityPredicate LegalityPredicates::typePairInSet(
};
}
+LegalityPredicate LegalityPredicates::typeTupleInSet(
+ unsigned TypeIdx0, unsigned TypeIdx1, unsigned TypeIdx2,
+ std::initializer_list<std::tuple<LLT, LLT, LLT>> TypesInit) {
+ SmallVector<std::tuple<LLT, LLT, LLT>, 4> Types = TypesInit;
+ return [=](const LegalityQuery &Query) {
+ std::tuple<LLT, LLT, LLT> Match = {
+ Query.Types[TypeIdx0], Query.Types[TypeIdx1], Query.Types[TypeIdx2]};
+ return llvm::is_contained(Types, Match);
+ };
+}
+
LegalityPredicate LegalityPredicates::typePairAndMemDescInSet(
unsigned TypeIdx0, unsigned TypeIdx1, unsigned MMOIdx,
std::initializer_list<TypePairAndMemDesc> TypesAndMemDescInit) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 400024922124cd..31627060c9a6bc 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -978,6 +978,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
.legalIf(
typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64, v2p0}))
+ .legalFor(HasSVE, {{nxv4s32, s32, s64}, {nxv2s64, s64, s64}})
.moreElementsToNextPow2(0)
.widenVectorEltsToVectorMinSize(0, 64)
.clampNumElements(0, v8s8, v16s8)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index b40fe55fdfaf67..2bcfdc1b46873b 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -426,6 +426,9 @@ void applyNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI,
LLT EltTy = MRI.getType(Insert.getElementReg());
LLT IdxTy = MRI.getType(Insert.getIndexReg());
+ if (VecTy.isScalableVector())
+ return;
+
// Create a stack slot and store the vector into it
MachineFunction &MF = Builder.getMF();
Align Alignment(
diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
index 0481d997d24faf..b7cc04fa6edc58 100644
--- a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc -mtriple=aarch64 -mattr=+sve -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -mattr=+sve -aarch64-enable-gisel-sve=1 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; CHECK-GI: warning: Instruction selection used fallback path for extract_v4i32_vector_insert
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for extract_v4i32_vector_insert_const
@@ -143,11 +143,12 @@ define i64 @extract_v2i64_extract_build_vector_opaque(<2 x i64> %a, i32 %c) {
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: sub sp, sp, #16
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
-; CHECK-SD-NEXT: adrp x8, .LCPI8_0
+; CHECK-SD-NEXT: mov x8, #-31 // =0xffffffffffffffe1
; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI8_0]
+; CHECK-SD-NEXT: index z0.d, #0, x8
; CHECK-SD-NEXT: mov x8, sp
; CHECK-SD-NEXT: bfi x8, x0, #3, #1
+; CHECK-SD-NEXT: add z0.d, z0.d, #42 // =0x2a
; CHECK-SD-NEXT: str q0, [sp]
; CHECK-SD-NEXT: ldr x0, [x8]
; CHECK-SD-NEXT: add sp, sp, #16
@@ -1043,11 +1044,10 @@ entry:
define i32 @extract_v4i32_phi(i64 %val, i32 %limit, ptr %ptr) {
; CHECK-SD-LABEL: extract_v4i32_phi:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: dup v1.2s, w0
-; CHECK-SD-NEXT: adrp x8, .LCPI41_0
+; CHECK-SD-NEXT: index z1.s, #1, #1
+; CHECK-SD-NEXT: dup v2.2s, w0
; CHECK-SD-NEXT: movi v0.2s, #16
-; CHECK-SD-NEXT: ldr d2, [x8, :lo12:.LCPI41_0]
-; CHECK-SD-NEXT: add v1.2s, v1.2s, v2.2s
+; CHECK-SD-NEXT: add v1.2s, v2.2s, v1.2s
; CHECK-SD-NEXT: .LBB41_1: // %loop
; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-SD-NEXT: fmov w8, s1
@@ -1100,4 +1100,72 @@ ret:
ret i32 %3
}
+define <vscale x 2 x i64> @insert_vscale_2_i64_zero(<vscale x 2 x i64> %vec, i64 %elt) {
+; CHECK-SD-LABEL: insert_vscale_2_i64_zero:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ptrue p0.d, vl1
+; CHECK-SD-NEXT: mov z0.d, p0/m, x0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: insert_vscale_2_i64_zero:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov x8, xzr
+; CHECK-GI-NEXT: index z1.d, #0, #1
+; CHECK-GI-NEXT: ptrue p0.d
+; CHECK-GI-NEXT: mov z2.d, x8
+; CHECK-GI-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
+; CHECK-GI-NEXT: mov z0.d, p0/m, x0
+; CHECK-GI-NEXT: ret
+entry:
+ %d = insertelement <vscale x 2 x i64> %vec, i64 %elt, i64 0
+ ret <vscale x 2 x i64> %d
+}
+
+define <vscale x 2 x i64> @insert_vscale_2_i64(<vscale x 2 x i64> %vec, i64 %elt, i64 %idx) {
+; CHECK-LABEL: insert_vscale_2_i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: index z1.d, #0, #1
+; CHECK-NEXT: mov z2.d, x1
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
+; CHECK-NEXT: mov z0.d, p0/m, x0
+; CHECK-NEXT: ret
+entry:
+ %d = insertelement <vscale x 2 x i64> %vec, i64 %elt, i64 %idx
+ ret <vscale x 2 x i64> %d
+}
+define <vscale x 4 x i32> @insert_vscale_4_i32_zero(<vscale x 4 x i32> %vec, i32 %elt) {
+; CHECK-SD-LABEL: insert_vscale_4_i32_zero:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ptrue p0.s, vl1
+; CHECK-SD-NEXT: mov z0.s, p0/m, w0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: insert_vscale_4_i32_zero:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov w8, wzr
+; CHECK-GI-NEXT: index z1.s, #0, #1
+; CHECK-GI-NEXT: ptrue p0.s
+; CHECK-GI-NEXT: mov z2.s, w8
+; CHECK-GI-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
+; CHECK-GI-NEXT: mov z0.s, p0/m, w0
+; CHECK-GI-NEXT: ret
+entry:
+ %d = insertelement <vscale x 4 x i32> %vec, i32 %elt, i64 0
+ ret <vscale x 4 x i32> %d
+}
+
+define <vscale x 4 x i32> @insert_vscale_4_i32(<vscale x 4 x i32> %vec, i32 %elt, i64 %idx) {
+; CHECK-LABEL: insert_vscale_4_i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: index z1.s, #0, #1
+; CHECK-NEXT: mov z2.s, w1
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
+; CHECK-NEXT: mov z0.s, p0/m, w0
+; CHECK-NEXT: ret
+entry:
+ %d = insertelement <vscale x 4 x i32> %vec, i32 %elt, i64 %idx
+ ret <vscale x 4 x i32> %d
+}
|
Last PR had only MIR tests, which hid the LLVM-IR issue and made Windows CI unhappy. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm with nit
@@ -1099,5 +1099,3 @@ loop: | |||
ret: | |||
ret i32 %3 | |||
} | |||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Leftover whitespace change
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Trunk has no whitespace:
https://github.com/llvm/llvm-project/blob/main/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
There are patterns for: * {nxv2s32, s32, s64}, * {nxv4s16, s16, s64}, * {nxv2s16, s16, s64}
There are patterns for: * {nxv2s32, s32, s64}, * {nxv4s16, s16, s64}, * {nxv2s16, s16, s64}
There are patterns for: