Skip to content

Commit 8e37727

Browse files
author
Thorsten Schütt
authored
[GlobalISel][AArch64] Legalize G_INSERT_VECTOR_ELT for SVE (#114470)
There are patterns for: * {nxv2s32, s32, s64}, * {nxv4s16, s16, s64}, * {nxv2s16, s16, s64}
1 parent f16bff1 commit 8e37727

File tree

6 files changed

+162
-3
lines changed

6 files changed

+162
-3
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,11 @@ inline LegalityPredicate typeIsNot(unsigned TypeIdx, LLT Type) {
273273
LegalityPredicate
274274
typePairInSet(unsigned TypeIdx0, unsigned TypeIdx1,
275275
std::initializer_list<std::pair<LLT, LLT>> TypesInit);
276+
/// True iff the given types for the given tuple of type indexes is one of the
277+
/// specified type tuple.
278+
LegalityPredicate
279+
typeTupleInSet(unsigned TypeIdx0, unsigned TypeIdx1, unsigned Type2,
280+
std::initializer_list<std::tuple<LLT, LLT, LLT>> TypesInit);
276281
/// True iff the given types for the given pair of type indexes is one of the
277282
/// specified type pairs.
278283
LegalityPredicate typePairAndMemDescInSet(
@@ -504,6 +509,15 @@ class LegalizeRuleSet {
504509
using namespace LegalityPredicates;
505510
return actionIf(Action, typePairInSet(typeIdx(0), typeIdx(1), Types));
506511
}
512+
513+
LegalizeRuleSet &
514+
actionFor(LegalizeAction Action,
515+
std::initializer_list<std::tuple<LLT, LLT, LLT>> Types) {
516+
using namespace LegalityPredicates;
517+
return actionIf(Action,
518+
typeTupleInSet(typeIdx(0), typeIdx(1), typeIdx(2), Types));
519+
}
520+
507521
/// Use the given action when type indexes 0 and 1 is any type pair in the
508522
/// given list.
509523
/// Action should be an action that requires mutation.
@@ -615,6 +629,12 @@ class LegalizeRuleSet {
615629
return *this;
616630
return actionFor(LegalizeAction::Legal, Types);
617631
}
632+
LegalizeRuleSet &
633+
legalFor(bool Pred, std::initializer_list<std::tuple<LLT, LLT, LLT>> Types) {
634+
if (!Pred)
635+
return *this;
636+
return actionFor(LegalizeAction::Legal, Types);
637+
}
618638
/// The instruction is legal when type index 0 is any type in the given list
619639
/// and imm index 0 is anything.
620640
LegalizeRuleSet &legalForTypeWithAnyImm(std::initializer_list<LLT> Types) {

llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,17 @@ LegalityPredicate LegalityPredicates::typePairInSet(
4949
};
5050
}
5151

52+
LegalityPredicate LegalityPredicates::typeTupleInSet(
53+
unsigned TypeIdx0, unsigned TypeIdx1, unsigned TypeIdx2,
54+
std::initializer_list<std::tuple<LLT, LLT, LLT>> TypesInit) {
55+
SmallVector<std::tuple<LLT, LLT, LLT>, 4> Types = TypesInit;
56+
return [=](const LegalityQuery &Query) {
57+
std::tuple<LLT, LLT, LLT> Match = {
58+
Query.Types[TypeIdx0], Query.Types[TypeIdx1], Query.Types[TypeIdx2]};
59+
return llvm::is_contained(Types, Match);
60+
};
61+
}
62+
5263
LegalityPredicate LegalityPredicates::typePairAndMemDescInSet(
5364
unsigned TypeIdx0, unsigned TypeIdx1, unsigned MMOIdx,
5465
std::initializer_list<TypePairAndMemDesc> TypesAndMemDescInit) {
@@ -202,7 +213,7 @@ LegalityPredicate LegalityPredicates::memSizeNotByteSizePow2(unsigned MMOIdx) {
202213
LegalityPredicate LegalityPredicates::numElementsNotPow2(unsigned TypeIdx) {
203214
return [=](const LegalityQuery &Query) {
204215
const LLT QueryTy = Query.Types[TypeIdx];
205-
return QueryTy.isVector() && !isPowerOf2_32(QueryTy.getNumElements());
216+
return QueryTy.isFixedVector() && !isPowerOf2_32(QueryTy.getNumElements());
206217
};
207218
}
208219

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -978,6 +978,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
978978
getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
979979
.legalIf(
980980
typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64, v2p0}))
981+
.legalFor(HasSVE, {{nxv16s8, s32, s64},
982+
{nxv8s16, s32, s64},
983+
{nxv4s32, s32, s64},
984+
{nxv2s64, s64, s64}})
981985
.moreElementsToNextPow2(0)
982986
.widenVectorEltsToVectorMinSize(0, 64)
983987
.clampNumElements(0, v8s8, v16s8)

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,9 @@ void applyNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI,
426426
LLT EltTy = MRI.getType(Insert.getElementReg());
427427
LLT IdxTy = MRI.getType(Insert.getIndexReg());
428428

429+
if (VecTy.isScalableVector())
430+
return;
431+
429432
// Create a stack slot and store the vector into it
430433
MachineFunction &MF = Builder.getMF();
431434
Align Alignment(
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=aarch64 -mattr=+sve -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3+
; RUN: llc -mtriple=aarch64 -mattr=+sve -aarch64-enable-gisel-sve=1 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4+
5+
define <vscale x 2 x i64> @insert_vscale_2_i64_zero(<vscale x 2 x i64> %vec, i64 %elt) {
6+
; CHECK-SD-LABEL: insert_vscale_2_i64_zero:
7+
; CHECK-SD: // %bb.0: // %entry
8+
; CHECK-SD-NEXT: ptrue p0.d, vl1
9+
; CHECK-SD-NEXT: mov z0.d, p0/m, x0
10+
; CHECK-SD-NEXT: ret
11+
;
12+
; CHECK-GI-LABEL: insert_vscale_2_i64_zero:
13+
; CHECK-GI: // %bb.0: // %entry
14+
; CHECK-GI-NEXT: mov x8, xzr
15+
; CHECK-GI-NEXT: index z1.d, #0, #1
16+
; CHECK-GI-NEXT: ptrue p0.d
17+
; CHECK-GI-NEXT: mov z2.d, x8
18+
; CHECK-GI-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
19+
; CHECK-GI-NEXT: mov z0.d, p0/m, x0
20+
; CHECK-GI-NEXT: ret
21+
entry:
22+
%d = insertelement <vscale x 2 x i64> %vec, i64 %elt, i64 0
23+
ret <vscale x 2 x i64> %d
24+
}
25+
26+
define <vscale x 2 x i64> @insert_vscale_2_i64(<vscale x 2 x i64> %vec, i64 %elt, i64 %idx) {
27+
; CHECK-LABEL: insert_vscale_2_i64:
28+
; CHECK: // %bb.0: // %entry
29+
; CHECK-NEXT: index z1.d, #0, #1
30+
; CHECK-NEXT: mov z2.d, x1
31+
; CHECK-NEXT: ptrue p0.d
32+
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
33+
; CHECK-NEXT: mov z0.d, p0/m, x0
34+
; CHECK-NEXT: ret
35+
entry:
36+
%d = insertelement <vscale x 2 x i64> %vec, i64 %elt, i64 %idx
37+
ret <vscale x 2 x i64> %d
38+
}
39+
40+
define <vscale x 4 x i32> @insert_vscale_4_i32_zero(<vscale x 4 x i32> %vec, i32 %elt) {
41+
; CHECK-SD-LABEL: insert_vscale_4_i32_zero:
42+
; CHECK-SD: // %bb.0: // %entry
43+
; CHECK-SD-NEXT: ptrue p0.s, vl1
44+
; CHECK-SD-NEXT: mov z0.s, p0/m, w0
45+
; CHECK-SD-NEXT: ret
46+
;
47+
; CHECK-GI-LABEL: insert_vscale_4_i32_zero:
48+
; CHECK-GI: // %bb.0: // %entry
49+
; CHECK-GI-NEXT: mov w8, wzr
50+
; CHECK-GI-NEXT: index z1.s, #0, #1
51+
; CHECK-GI-NEXT: ptrue p0.s
52+
; CHECK-GI-NEXT: mov z2.s, w8
53+
; CHECK-GI-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
54+
; CHECK-GI-NEXT: mov z0.s, p0/m, w0
55+
; CHECK-GI-NEXT: ret
56+
entry:
57+
%d = insertelement <vscale x 4 x i32> %vec, i32 %elt, i64 0
58+
ret <vscale x 4 x i32> %d
59+
}
60+
61+
define <vscale x 4 x i32> @insert_vscale_4_i32(<vscale x 4 x i32> %vec, i32 %elt, i64 %idx) {
62+
; CHECK-LABEL: insert_vscale_4_i32:
63+
; CHECK: // %bb.0: // %entry
64+
; CHECK-NEXT: index z1.s, #0, #1
65+
; CHECK-NEXT: mov z2.s, w1
66+
; CHECK-NEXT: ptrue p0.s
67+
; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
68+
; CHECK-NEXT: mov z0.s, p0/m, w0
69+
; CHECK-NEXT: ret
70+
entry:
71+
%d = insertelement <vscale x 4 x i32> %vec, i32 %elt, i64 %idx
72+
ret <vscale x 4 x i32> %d
73+
}
74+
75+
define <vscale x 8 x i16> @insert_vscale_8_i16_zero(<vscale x 8 x i16> %vec, i16 %elt) {
76+
; CHECK-LABEL: insert_vscale_8_i16_zero:
77+
; CHECK: // %bb.0: // %entry
78+
; CHECK-NEXT: ptrue p0.h, vl1
79+
; CHECK-NEXT: mov z0.h, p0/m, w0
80+
; CHECK-NEXT: ret
81+
entry:
82+
%d = insertelement <vscale x 8 x i16> %vec, i16 %elt, i64 0
83+
ret <vscale x 8 x i16> %d
84+
}
85+
86+
define <vscale x 8 x i16> @insert_vscale_8_i16(<vscale x 8 x i16> %vec, i16 %elt, i64 %idx) {
87+
; CHECK-LABEL: insert_vscale_8_i16:
88+
; CHECK: // %bb.0: // %entry
89+
; CHECK-NEXT: index z1.h, #0, #1
90+
; CHECK-NEXT: mov z2.h, w1
91+
; CHECK-NEXT: ptrue p0.h
92+
; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
93+
; CHECK-NEXT: mov z0.h, p0/m, w0
94+
; CHECK-NEXT: ret
95+
entry:
96+
%d = insertelement <vscale x 8 x i16> %vec, i16 %elt, i64 %idx
97+
ret <vscale x 8 x i16> %d
98+
}
99+
100+
define <vscale x 16 x i8> @insert_vscale_16_i8_zero(<vscale x 16 x i8> %vec, i8 %elt) {
101+
; CHECK-LABEL: insert_vscale_16_i8_zero:
102+
; CHECK: // %bb.0: // %entry
103+
; CHECK-NEXT: ptrue p0.b, vl1
104+
; CHECK-NEXT: mov z0.b, p0/m, w0
105+
; CHECK-NEXT: ret
106+
entry:
107+
%d = insertelement <vscale x 16 x i8> %vec, i8 %elt, i64 0
108+
ret <vscale x 16 x i8> %d
109+
}
110+
111+
define <vscale x 16 x i8> @insert_vscale_16_i8(<vscale x 16 x i8> %vec, i8 %elt, i64 %idx) {
112+
; CHECK-LABEL: insert_vscale_16_i8:
113+
; CHECK: // %bb.0: // %entry
114+
; CHECK-NEXT: index z1.b, #0, #1
115+
; CHECK-NEXT: mov z2.b, w1
116+
; CHECK-NEXT: ptrue p0.b
117+
; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b
118+
; CHECK-NEXT: mov z0.b, p0/m, w0
119+
; CHECK-NEXT: ret
120+
entry:
121+
%d = insertelement <vscale x 16 x i8> %vec, i8 %elt, i64 %idx
122+
ret <vscale x 16 x i8> %d
123+
}

llvm/test/CodeGen/AArch64/extract-vector-elt.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1099,5 +1099,3 @@ loop:
10991099
ret:
11001100
ret i32 %3
11011101
}
1102-
1103-

0 commit comments

Comments
 (0)