Skip to content

Commit ac321cb

Browse files
authored
[AArch64][GlobalISel] Legalize Insert vector element (#81453)
This attempts to standardize and extend some of the insert vector element lowering. Most notably: - More types are handled by splitting illegal vectors. - The index type for G_INSERT_VECTOR_ELT is canonicalized to TLI.getVectorIdxTy(), similar to extact_vector_element. - Some of the existing patterns now have the index type specified to make sure they can apply to GISel too. - The C++ selection code has been removed, relying on tablegen patterns. - G_INSERT_VECTOR_ELT with small GPR input elements are pre-selected to use a i32 type, allowing the existing patterns to apply. - Variable index inserts are lowered in post-legalizer lowering, expanding into a stack store and reload.
1 parent 2084a07 commit ac321cb

37 files changed

+1759
-1319
lines changed

llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "llvm/CodeGen/MachineBasicBlock.h"
1818
#include "llvm/CodeGen/MachineInstrBuilder.h"
1919
#include "llvm/CodeGen/MachineRegisterInfo.h"
20+
#include "llvm/CodeGen/TargetLowering.h"
2021
#include "llvm/CodeGen/TargetOpcodes.h"
2122
#include "llvm/IR/DebugLoc.h"
2223
#include "llvm/IR/Module.h"
@@ -1300,8 +1301,10 @@ class MachineIRBuilder {
13001301
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res,
13011302
const SrcOp &Val,
13021303
const int Idx) {
1303-
return buildExtractVectorElement(Res, Val,
1304-
buildConstant(LLT::scalar(64), Idx));
1304+
auto TLI = getMF().getSubtarget().getTargetLowering();
1305+
unsigned VecIdxWidth = TLI->getVectorIdxTy(getDataLayout()).getSizeInBits();
1306+
return buildExtractVectorElement(
1307+
Res, Val, buildConstant(LLT::scalar(VecIdxWidth), Idx));
13051308
}
13061309

13071310
/// Build and insert \p Res = G_EXTRACT_VECTOR_ELT \p Val, \p Idx

llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ def : GINodeEquiv<G_CTLZ_ZERO_UNDEF, ctlz_zero_undef>;
142142
def : GINodeEquiv<G_CTTZ_ZERO_UNDEF, cttz_zero_undef>;
143143
def : GINodeEquiv<G_CTPOP, ctpop>;
144144
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, extractelt>;
145+
def : GINodeEquiv<G_INSERT_VECTOR_ELT, vector_insert>;
145146
def : GINodeEquiv<G_CONCAT_VECTORS, concat_vectors>;
146147
def : GINodeEquiv<G_BUILD_VECTOR, build_vector>;
147148
def : GINodeEquiv<G_FCEIL, fceil>;

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3087,7 +3087,21 @@ bool IRTranslator::translateInsertElement(const User &U,
30873087
Register Res = getOrCreateVReg(U);
30883088
Register Val = getOrCreateVReg(*U.getOperand(0));
30893089
Register Elt = getOrCreateVReg(*U.getOperand(1));
3090-
Register Idx = getOrCreateVReg(*U.getOperand(2));
3090+
unsigned PreferredVecIdxWidth = TLI->getVectorIdxTy(*DL).getSizeInBits();
3091+
Register Idx;
3092+
if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(2))) {
3093+
if (CI->getBitWidth() != PreferredVecIdxWidth) {
3094+
APInt NewIdx = CI->getValue().zextOrTrunc(PreferredVecIdxWidth);
3095+
auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx);
3096+
Idx = getOrCreateVReg(*NewIdxCI);
3097+
}
3098+
}
3099+
if (!Idx)
3100+
Idx = getOrCreateVReg(*U.getOperand(2));
3101+
if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) {
3102+
const LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth);
3103+
Idx = MIRBuilder.buildZExtOrTrunc(VecIdxTy, Idx).getReg(0);
3104+
}
30913105
MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx);
30923106
return true;
30933107
}

llvm/lib/CodeGen/MachineVerifier.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
#include "llvm/CodeGen/SlotIndexes.h"
5656
#include "llvm/CodeGen/StackMaps.h"
5757
#include "llvm/CodeGen/TargetInstrInfo.h"
58+
#include "llvm/CodeGen/TargetLowering.h"
5859
#include "llvm/CodeGen/TargetOpcodes.h"
5960
#include "llvm/CodeGen/TargetRegisterInfo.h"
6061
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -1788,6 +1789,60 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
17881789

17891790
break;
17901791
}
1792+
case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1793+
LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
1794+
LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
1795+
LLT IdxTy = MRI->getType(MI->getOperand(2).getReg());
1796+
1797+
if (!DstTy.isScalar() && !DstTy.isPointer()) {
1798+
report("Destination type must be a scalar or pointer", MI);
1799+
break;
1800+
}
1801+
1802+
if (!SrcTy.isVector()) {
1803+
report("First source must be a vector", MI);
1804+
break;
1805+
}
1806+
1807+
auto TLI = MF->getSubtarget().getTargetLowering();
1808+
if (IdxTy.getSizeInBits() !=
1809+
TLI->getVectorIdxTy(MF->getDataLayout()).getFixedSizeInBits()) {
1810+
report("Index type must match VectorIdxTy", MI);
1811+
break;
1812+
}
1813+
1814+
break;
1815+
}
1816+
case TargetOpcode::G_INSERT_VECTOR_ELT: {
1817+
LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
1818+
LLT VecTy = MRI->getType(MI->getOperand(1).getReg());
1819+
LLT ScaTy = MRI->getType(MI->getOperand(2).getReg());
1820+
LLT IdxTy = MRI->getType(MI->getOperand(3).getReg());
1821+
1822+
if (!DstTy.isVector()) {
1823+
report("Destination type must be a vector", MI);
1824+
break;
1825+
}
1826+
1827+
if (VecTy != DstTy) {
1828+
report("Destination type and vector type must match", MI);
1829+
break;
1830+
}
1831+
1832+
if (!ScaTy.isScalar() && !ScaTy.isPointer()) {
1833+
report("Inserted element must be a scalar or pointer", MI);
1834+
break;
1835+
}
1836+
1837+
auto TLI = MF->getSubtarget().getTargetLowering();
1838+
if (IdxTy.getSizeInBits() !=
1839+
TLI->getVectorIdxTy(MF->getDataLayout()).getFixedSizeInBits()) {
1840+
report("Index type must match VectorIdxTy", MI);
1841+
break;
1842+
}
1843+
1844+
break;
1845+
}
17911846
case TargetOpcode::G_DYN_STACKALLOC: {
17921847
const MachineOperand &DstOp = MI->getOperand(0);
17931848
const MachineOperand &AllocOp = MI->getOperand(1);

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,13 @@ def ext: GICombineRule <
114114
(apply [{ applyEXT(*${root}, ${matchinfo}); }])
115115
>;
116116

117+
def insertelt_nonconst: GICombineRule <
118+
(defs root:$root, shuffle_matchdata:$matchinfo),
119+
(match (wip_match_opcode G_INSERT_VECTOR_ELT):$root,
120+
[{ return matchNonConstInsert(*${root}, MRI); }]),
121+
(apply [{ applyNonConstInsert(*${root}, MRI, B); }])
122+
>;
123+
117124
def shuf_to_ins_matchdata : GIDefMatchData<"std::tuple<Register, int, Register, int>">;
118125
def shuf_to_ins: GICombineRule <
119126
(defs root:$root, shuf_to_ins_matchdata:$matchinfo),
@@ -140,8 +147,7 @@ def form_duplane : GICombineRule <
140147
>;
141148

142149
def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn,
143-
form_duplane,
144-
shuf_to_ins]>;
150+
form_duplane, shuf_to_ins]>;
145151

146152
// Turn G_UNMERGE_VALUES -> G_EXTRACT_VECTOR_ELT's
147153
def vector_unmerge_lowering : GICombineRule <
@@ -269,7 +275,7 @@ def AArch64PostLegalizerLowering
269275
lower_vector_fcmp, form_truncstore,
270276
vector_sext_inreg_to_shift,
271277
unmerge_ext_to_unmerge, lower_mull,
272-
vector_unmerge_lowering]> {
278+
vector_unmerge_lowering, insertelt_nonconst]> {
273279
}
274280

275281
// Post-legalization combines which are primarily optimizations.

llvm/lib/Target/AArch64/AArch64InstrAtomics.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -547,10 +547,10 @@ let Predicates = [HasLSE] in {
547547
let Predicates = [HasRCPC3, HasNEON] in {
548548
// LDAP1 loads
549549
def : Pat<(vector_insert (v2i64 VecListOne128:$Rd),
550-
(i64 (acquiring_load<atomic_load_64> GPR64sp:$Rn)), VectorIndexD:$idx),
550+
(i64 (acquiring_load<atomic_load_64> GPR64sp:$Rn)), (i64 VectorIndexD:$idx)),
551551
(LDAP1 VecListOne128:$Rd, VectorIndexD:$idx, GPR64sp:$Rn)>;
552552
def : Pat<(vector_insert (v2f64 VecListOne128:$Rd),
553-
(f64 (bitconvert (i64 (acquiring_load<atomic_load_64> GPR64sp:$Rn)))), VectorIndexD:$idx),
553+
(f64 (bitconvert (i64 (acquiring_load<atomic_load_64> GPR64sp:$Rn)))), (i64 VectorIndexD:$idx)),
554554
(LDAP1 VecListOne128:$Rd, VectorIndexD:$idx, GPR64sp:$Rn)>;
555555
def : Pat<(v1i64 (scalar_to_vector
556556
(i64 (acquiring_load<atomic_load_64> GPR64sp:$Rn)))),

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7983,7 +7983,7 @@ class SIMDInsFromMain<string size, ValueType vectype,
79837983
"|" # size # "\t$Rd$idx, $Rn}",
79847984
"$Rd = $dst",
79857985
[(set V128:$dst,
7986-
(vector_insert (vectype V128:$Rd), regtype:$Rn, idxtype:$idx))]> {
7986+
(vector_insert (vectype V128:$Rd), regtype:$Rn, (i64 idxtype:$idx)))]> {
79877987
let Inst{14-11} = 0b0011;
79887988
}
79897989

@@ -7997,8 +7997,8 @@ class SIMDInsFromElement<string size, ValueType vectype,
79977997
[(set V128:$dst,
79987998
(vector_insert
79997999
(vectype V128:$Rd),
8000-
(elttype (vector_extract (vectype V128:$Rn), idxtype:$idx2)),
8001-
idxtype:$idx))]>;
8000+
(elttype (vector_extract (vectype V128:$Rn), (i64 idxtype:$idx2))),
8001+
(i64 idxtype:$idx)))]>;
80028002

80038003
class SIMDInsMainMovAlias<string size, Instruction inst,
80048004
RegisterClass regtype, Operand idxtype>

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6601,6 +6601,15 @@ def : Pat<(v8i8 (vector_insert (v8i8 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndex
66016601
VectorIndexB:$imm, GPR32:$Rm),
66026602
dsub)>;
66036603

6604+
def : Pat<(v8i8 (vector_insert (v8i8 V64:$Rn), (i8 FPR8:$Rm), (i64 VectorIndexB:$imm))),
6605+
(EXTRACT_SUBREG
6606+
(INSvi8lane (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6607+
VectorIndexB:$imm, (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR8:$Rm, bsub)), (i64 0)),
6608+
dsub)>;
6609+
def : Pat<(v16i8 (vector_insert (v16i8 V128:$Rn), (i8 FPR8:$Rm), (i64 VectorIndexB:$imm))),
6610+
(INSvi8lane V128:$Rn, VectorIndexB:$imm,
6611+
(v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR8:$Rm, bsub)), (i64 0))>;
6612+
66046613
// Copy an element at a constant index in one vector into a constant indexed
66056614
// element of another.
66066615
// FIXME refactor to a shared class/dev parameterized on vector type, vector
@@ -6633,26 +6642,26 @@ def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane
66336642
multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
66346643
ValueType VTScal, Instruction INS> {
66356644
def : Pat<(VT128 (vector_insert V128:$src,
6636-
(VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)),
6637-
imm:$Immd)),
6645+
(VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
6646+
(i64 imm:$Immd))),
66386647
(INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>;
66396648

66406649
def : Pat<(VT128 (vector_insert V128:$src,
6641-
(VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)),
6642-
imm:$Immd)),
6650+
(VTScal (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))),
6651+
(i64 imm:$Immd))),
66436652
(INS V128:$src, imm:$Immd,
66446653
(SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>;
66456654

66466655
def : Pat<(VT64 (vector_insert V64:$src,
6647-
(VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)),
6648-
imm:$Immd)),
6656+
(VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
6657+
(i64 imm:$Immd))),
66496658
(EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub),
66506659
imm:$Immd, V128:$Rn, imm:$Immn),
66516660
dsub)>;
66526661

66536662
def : Pat<(VT64 (vector_insert V64:$src,
6654-
(VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)),
6655-
imm:$Immd)),
6663+
(VTScal (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))),
6664+
(i64 imm:$Immd))),
66566665
(EXTRACT_SUBREG
66576666
(INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd,
66586667
(SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn),
@@ -6671,14 +6680,14 @@ defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi64lane>;
66716680

66726681
// Insert from bitcast
66736682
// vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0)
6674-
def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)),
6683+
def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), (i64 imm:$Immd))),
66756684
(INSvi32lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0)>;
6676-
def : Pat<(v2i32 (vector_insert v2i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)),
6685+
def : Pat<(v2i32 (vector_insert v2i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), (i64 imm:$Immd))),
66776686
(EXTRACT_SUBREG
66786687
(INSvi32lane (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$src, dsub)),
66796688
imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0),
66806689
dsub)>;
6681-
def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), imm:$Immd)),
6690+
def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), (i64 imm:$Immd))),
66826691
(INSvi64lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$Sn, dsub), 0)>;
66836692

66846693
// bitcast of an extract
@@ -8100,7 +8109,7 @@ def : Pat<(v8bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))),
81008109
class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
81018110
ValueType VTy, ValueType STy, Instruction LD1>
81028111
: Pat<(vector_insert (VTy VecListOne128:$Rd),
8103-
(STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
8112+
(STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)),
81048113
(LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>;
81058114

81068115
def : Ld1Lane128Pat<extloadi8, VectorIndexB, v16i8, i32, LD1i8>;
@@ -8123,14 +8132,14 @@ class Ld1Lane128IdxOpPat<SDPatternOperator scalar_load, Operand
81238132
VecIndex, ValueType VTy, ValueType STy,
81248133
Instruction LD1, SDNodeXForm IdxOp>
81258134
: Pat<(vector_insert (VTy VecListOne128:$Rd),
8126-
(STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
8135+
(STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)),
81278136
(LD1 VecListOne128:$Rd, (IdxOp VecIndex:$idx), GPR64sp:$Rn)>;
81288137

81298138
class Ld1Lane64IdxOpPat<SDPatternOperator scalar_load, Operand VecIndex,
81308139
ValueType VTy, ValueType STy, Instruction LD1,
81318140
SDNodeXForm IdxOp>
81328141
: Pat<(vector_insert (VTy VecListOne64:$Rd),
8133-
(STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
8142+
(STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)),
81348143
(EXTRACT_SUBREG
81358144
(LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
81368145
(IdxOp VecIndex:$idx), GPR64sp:$Rn),
@@ -8170,7 +8179,7 @@ let Predicates = [IsNeonAvailable] in {
81708179
class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
81718180
ValueType VTy, ValueType STy, Instruction LD1>
81728181
: Pat<(vector_insert (VTy VecListOne64:$Rd),
8173-
(STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
8182+
(STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)),
81748183
(EXTRACT_SUBREG
81758184
(LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
81768185
VecIndex:$idx, GPR64sp:$Rn),

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 0 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,6 @@ class AArch64InstructionSelector : public InstructionSelector {
191191
MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
192192
MachineIRBuilder &MIRBuilder);
193193

194-
bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
195194
bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
196195
MachineRegisterInfo &MRI);
197196
/// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
@@ -3498,8 +3497,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
34983497
return selectShuffleVector(I, MRI);
34993498
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
35003499
return selectExtractElt(I, MRI);
3501-
case TargetOpcode::G_INSERT_VECTOR_ELT:
3502-
return selectInsertElt(I, MRI);
35033500
case TargetOpcode::G_CONCAT_VECTORS:
35043501
return selectConcatVectors(I, MRI);
35053502
case TargetOpcode::G_JUMP_TABLE:
@@ -5330,65 +5327,6 @@ bool AArch64InstructionSelector::selectUSMovFromExtend(
53305327
return true;
53315328
}
53325329

5333-
bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
5334-
MachineRegisterInfo &MRI) {
5335-
assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
5336-
5337-
// Get information on the destination.
5338-
Register DstReg = I.getOperand(0).getReg();
5339-
const LLT DstTy = MRI.getType(DstReg);
5340-
unsigned VecSize = DstTy.getSizeInBits();
5341-
5342-
// Get information on the element we want to insert into the destination.
5343-
Register EltReg = I.getOperand(2).getReg();
5344-
const LLT EltTy = MRI.getType(EltReg);
5345-
unsigned EltSize = EltTy.getSizeInBits();
5346-
if (EltSize < 8 || EltSize > 64)
5347-
return false;
5348-
5349-
// Find the definition of the index. Bail out if it's not defined by a
5350-
// G_CONSTANT.
5351-
Register IdxReg = I.getOperand(3).getReg();
5352-
auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI);
5353-
if (!VRegAndVal)
5354-
return false;
5355-
unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
5356-
5357-
// Perform the lane insert.
5358-
Register SrcReg = I.getOperand(1).getReg();
5359-
const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5360-
5361-
if (VecSize < 128) {
5362-
// If the vector we're inserting into is smaller than 128 bits, widen it
5363-
// to 128 to do the insert.
5364-
MachineInstr *ScalarToVec =
5365-
emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB);
5366-
if (!ScalarToVec)
5367-
return false;
5368-
SrcReg = ScalarToVec->getOperand(0).getReg();
5369-
}
5370-
5371-
// Create an insert into a new FPR128 register.
5372-
// Note that if our vector is already 128 bits, we end up emitting an extra
5373-
// register.
5374-
MachineInstr *InsMI =
5375-
emitLaneInsert(std::nullopt, SrcReg, EltReg, LaneIdx, EltRB, MIB);
5376-
5377-
if (VecSize < 128) {
5378-
// If we had to widen to perform the insert, then we have to demote back to
5379-
// the original size to get the result we want.
5380-
if (!emitNarrowVector(DstReg, InsMI->getOperand(0).getReg(), MIB, MRI))
5381-
return false;
5382-
} else {
5383-
// No widening needed.
5384-
InsMI->getOperand(0).setReg(DstReg);
5385-
constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
5386-
}
5387-
5388-
I.eraseFromParent();
5389-
return true;
5390-
}
5391-
53925330
MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
53935331
Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
53945332
unsigned int Op;

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -886,9 +886,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
886886
.clampMaxNumElements(1, p0, 2);
887887

888888
getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
889-
.legalIf(typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64}))
889+
.legalIf(
890+
typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64, v2p0}))
890891
.moreElementsToNextPow2(0)
891-
.widenVectorEltsToVectorMinSize(0, 64);
892+
.widenVectorEltsToVectorMinSize(0, 64)
893+
.clampNumElements(0, v8s8, v16s8)
894+
.clampNumElements(0, v4s16, v8s16)
895+
.clampNumElements(0, v2s32, v4s32)
896+
.clampMaxNumElements(0, s64, 2)
897+
.clampMaxNumElements(0, p0, 2);
892898

893899
getActionDefinitionsBuilder(G_BUILD_VECTOR)
894900
.legalFor({{v8s8, s8},

0 commit comments

Comments
 (0)