-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AArch64][GlobalISel] Legalize Insert vector element #81453
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-backend-aarch64 Author: David Green (davemgreen) ChangesThis attempts to standardize and extend some of the insert vector element lowering. Most notably:
Patch is 88.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/81453.diff 16 Files Affected:
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 6bc19421fb0169..1ed4f03fc9cd4f 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -141,6 +141,7 @@ def : GINodeEquiv<G_CTLZ_ZERO_UNDEF, ctlz_zero_undef>;
def : GINodeEquiv<G_CTTZ_ZERO_UNDEF, cttz_zero_undef>;
def : GINodeEquiv<G_CTPOP, ctpop>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, extractelt>;
+def : GINodeEquiv<G_INSERT_VECTOR_ELT, vector_insert>;
def : GINodeEquiv<G_CONCAT_VECTORS, concat_vectors>;
def : GINodeEquiv<G_BUILD_VECTOR, build_vector>;
def : GINodeEquiv<G_FCEIL, fceil>;
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index c1d8e890a66edb..8d122673888a0c 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2969,7 +2969,22 @@ bool IRTranslator::translateInsertElement(const User &U,
Register Res = getOrCreateVReg(U);
Register Val = getOrCreateVReg(*U.getOperand(0));
Register Elt = getOrCreateVReg(*U.getOperand(1));
- Register Idx = getOrCreateVReg(*U.getOperand(2));
+ const auto &TLI = *MF->getSubtarget().getTargetLowering();
+ unsigned PreferredVecIdxWidth = TLI.getVectorIdxTy(*DL).getSizeInBits();
+ Register Idx;
+ if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(2))) {
+ if (CI->getBitWidth() != PreferredVecIdxWidth) {
+ APInt NewIdx = CI->getValue().zextOrTrunc(PreferredVecIdxWidth);
+ auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx);
+ Idx = getOrCreateVReg(*NewIdxCI);
+ }
+ }
+ if (!Idx)
+ Idx = getOrCreateVReg(*U.getOperand(2));
+ if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) {
+ const LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth);
+ Idx = MIRBuilder.buildZExtOrTrunc(VecIdxTy, Idx).getReg(0);
+ }
MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx);
return true;
}
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index fdea974d4540a0..3e8c410c46c7e7 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -114,6 +114,13 @@ def ext: GICombineRule <
(apply [{ applyEXT(*${root}, ${matchinfo}); }])
>;
+def insertelt_nonconst: GICombineRule <
+ (defs root:$root, shuffle_matchdata:$matchinfo),
+ (match (wip_match_opcode G_INSERT_VECTOR_ELT):$root,
+ [{ return matchNonConstInsert(*${root}, MRI); }]),
+ (apply [{ applyNonConstInsert(*${root}, MRI, B); }])
+>;
+
def shuf_to_ins_matchdata : GIDefMatchData<"std::tuple<Register, int, Register, int>">;
def shuf_to_ins: GICombineRule <
(defs root:$root, shuf_to_ins_matchdata:$matchinfo),
@@ -140,8 +147,7 @@ def form_duplane : GICombineRule <
>;
def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn,
- form_duplane,
- shuf_to_ins]>;
+ form_duplane, shuf_to_ins]>;
// Turn G_UNMERGE_VALUES -> G_EXTRACT_VECTOR_ELT's
def vector_unmerge_lowering : GICombineRule <
@@ -269,7 +275,7 @@ def AArch64PostLegalizerLowering
lower_vector_fcmp, form_truncstore,
vector_sext_inreg_to_shift,
unmerge_ext_to_unmerge, lower_mull,
- vector_unmerge_lowering]> {
+ vector_unmerge_lowering, insertelt_nonconst]> {
}
// Post-legalization combines which are primarily optimizations.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 0002db52b1995c..de94cf64c9801c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -547,10 +547,10 @@ let Predicates = [HasLSE] in {
let Predicates = [HasRCPC3, HasNEON] in {
// LDAP1 loads
def : Pat<(vector_insert (v2i64 VecListOne128:$Rd),
- (i64 (acquiring_load<atomic_load_64> GPR64sp:$Rn)), VectorIndexD:$idx),
+ (i64 (acquiring_load<atomic_load_64> GPR64sp:$Rn)), (i64 VectorIndexD:$idx)),
(LDAP1 VecListOne128:$Rd, VectorIndexD:$idx, GPR64sp:$Rn)>;
def : Pat<(vector_insert (v2f64 VecListOne128:$Rd),
- (f64 (bitconvert (i64 (acquiring_load<atomic_load_64> GPR64sp:$Rn)))), VectorIndexD:$idx),
+ (f64 (bitconvert (i64 (acquiring_load<atomic_load_64> GPR64sp:$Rn)))), (i64 VectorIndexD:$idx)),
(LDAP1 VecListOne128:$Rd, VectorIndexD:$idx, GPR64sp:$Rn)>;
def : Pat<(v1i64 (scalar_to_vector
(i64 (acquiring_load<atomic_load_64> GPR64sp:$Rn)))),
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 10ad5b1f8f2580..85722e25bfc970 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -7982,7 +7982,7 @@ class SIMDInsFromMain<string size, ValueType vectype,
"|" # size # "\t$Rd$idx, $Rn}",
"$Rd = $dst",
[(set V128:$dst,
- (vector_insert (vectype V128:$Rd), regtype:$Rn, idxtype:$idx))]> {
+ (vector_insert (vectype V128:$Rd), regtype:$Rn, (i64 idxtype:$idx)))]> {
let Inst{14-11} = 0b0011;
}
@@ -7996,8 +7996,8 @@ class SIMDInsFromElement<string size, ValueType vectype,
[(set V128:$dst,
(vector_insert
(vectype V128:$Rd),
- (elttype (vector_extract (vectype V128:$Rn), idxtype:$idx2)),
- idxtype:$idx))]>;
+ (elttype (vector_extract (vectype V128:$Rn), (i64 idxtype:$idx2))),
+ (i64 idxtype:$idx)))]>;
class SIMDInsMainMovAlias<string size, Instruction inst,
RegisterClass regtype, Operand idxtype>
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 9c3a6927d043ba..51818192c6b773 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6540,6 +6540,15 @@ def : Pat<(v8i8 (vector_insert (v8i8 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndex
VectorIndexB:$imm, GPR32:$Rm),
dsub)>;
+def : Pat<(v8i8 (vector_insert (v8i8 V64:$Rn), (i8 FPR8:$Rm), (i64 VectorIndexB:$imm))),
+ (EXTRACT_SUBREG
+ (INSvi8lane (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), V64:$Rn, dsub)),
+ VectorIndexB:$imm, (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR8:$Rm, bsub)), (i64 0)),
+ dsub)>;
+def : Pat<(v16i8 (vector_insert (v16i8 V128:$Rn), (i8 FPR8:$Rm), (i64 VectorIndexB:$imm))),
+ (INSvi8lane V128:$Rn, VectorIndexB:$imm,
+ (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR8:$Rm, bsub)), (i64 0))>;
+
// Copy an element at a constant index in one vector into a constant indexed
// element of another.
// FIXME refactor to a shared class/dev parameterized on vector type, vector
@@ -6572,26 +6581,26 @@ def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane
multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
ValueType VTScal, Instruction INS> {
def : Pat<(VT128 (vector_insert V128:$src,
- (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)),
- imm:$Immd)),
+ (VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
+ (i64 imm:$Immd))),
(INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>;
def : Pat<(VT128 (vector_insert V128:$src,
- (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)),
- imm:$Immd)),
+ (VTScal (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))),
+ (i64 imm:$Immd))),
(INS V128:$src, imm:$Immd,
(SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>;
def : Pat<(VT64 (vector_insert V64:$src,
- (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)),
- imm:$Immd)),
+ (VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
+ (i64 imm:$Immd))),
(EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub),
imm:$Immd, V128:$Rn, imm:$Immn),
dsub)>;
def : Pat<(VT64 (vector_insert V64:$src,
- (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)),
- imm:$Immd)),
+ (VTScal (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))),
+ (i64 imm:$Immd))),
(EXTRACT_SUBREG
(INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd,
(SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn),
@@ -6610,14 +6619,14 @@ defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi64lane>;
// Insert from bitcast
// vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0)
-def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)),
+def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), (i64 imm:$Immd))),
(INSvi32lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0)>;
-def : Pat<(v2i32 (vector_insert v2i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)),
+def : Pat<(v2i32 (vector_insert v2i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), (i64 imm:$Immd))),
(EXTRACT_SUBREG
(INSvi32lane (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$src, dsub)),
imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0),
dsub)>;
-def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), imm:$Immd)),
+def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), (i64 imm:$Immd))),
(INSvi64lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$Sn, dsub), 0)>;
// bitcast of an extract
@@ -7999,7 +8008,7 @@ def : Pat<(v8bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))),
class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction LD1>
: Pat<(vector_insert (VTy VecListOne128:$Rd),
- (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
+ (STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)),
(LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>;
def : Ld1Lane128Pat<extloadi8, VectorIndexB, v16i8, i32, LD1i8>;
@@ -8022,14 +8031,14 @@ class Ld1Lane128IdxOpPat<SDPatternOperator scalar_load, Operand
VecIndex, ValueType VTy, ValueType STy,
Instruction LD1, SDNodeXForm IdxOp>
: Pat<(vector_insert (VTy VecListOne128:$Rd),
- (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
+ (STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)),
(LD1 VecListOne128:$Rd, (IdxOp VecIndex:$idx), GPR64sp:$Rn)>;
class Ld1Lane64IdxOpPat<SDPatternOperator scalar_load, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction LD1,
SDNodeXForm IdxOp>
: Pat<(vector_insert (VTy VecListOne64:$Rd),
- (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
+ (STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)),
(EXTRACT_SUBREG
(LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
(IdxOp VecIndex:$idx), GPR64sp:$Rn),
@@ -8069,7 +8078,7 @@ let Predicates = [IsNeonAvailable] in {
class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction LD1>
: Pat<(vector_insert (VTy VecListOne64:$Rd),
- (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
+ (STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)),
(EXTRACT_SUBREG
(LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
VecIndex:$idx, GPR64sp:$Rn),
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 9d51a7f7616ddb..d6978bf90a4b21 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -191,7 +191,6 @@ class AArch64InstructionSelector : public InstructionSelector {
MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
MachineIRBuilder &MIRBuilder);
- bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
MachineRegisterInfo &MRI);
/// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
@@ -2121,6 +2120,31 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
}
return false;
}
+ case TargetOpcode::G_INSERT_VECTOR_ELT: {
+ Register InsReg = I.getOperand(2).getReg();
+ LLT SrcTy = MRI.getType(InsReg);
+ if (RBI.getRegBank(InsReg, MRI, TRI)->getID() == AArch64::GPRRegBankID &&
+ SrcTy.getSizeInBits() < 32) {
+ if (auto *MI = MRI.getVRegDef(InsReg)) {
+ if (MI->getOpcode() == TargetOpcode::G_TRUNC &&
+ (MRI.getType(MI->getOperand(1).getReg()).getSizeInBits() == 32 ||
+ MRI.getType(MI->getOperand(1).getReg()).getSizeInBits() == 64)) {
+ I.getOperand(2).setReg(MI->getOperand(1).getReg());
+ return true;
+ }
+ }
+ auto Ext = MIB.buildAnyExt(LLT::scalar(32), InsReg);
+ Register ExtDst = Ext.getReg(0);
+ MRI.setRegBank(ExtDst, RBI.getRegBank(AArch64::GPRRegBankID));
+ if (!select(*Ext)) {
+ LLVM_DEBUG(dbgs() << "Failed to select G_ANYEXT in G_INSERT_VECTOR_ELT");
+ return false;
+ }
+ I.getOperand(2).setReg(ExtDst);
+ return true;
+ }
+ return false;
+ }
default:
return false;
}
@@ -3487,8 +3511,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return selectShuffleVector(I, MRI);
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
return selectExtractElt(I, MRI);
- case TargetOpcode::G_INSERT_VECTOR_ELT:
- return selectInsertElt(I, MRI);
case TargetOpcode::G_CONCAT_VECTORS:
return selectConcatVectors(I, MRI);
case TargetOpcode::G_JUMP_TABLE:
@@ -5319,65 +5341,6 @@ bool AArch64InstructionSelector::selectUSMovFromExtend(
return true;
}
-bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
- MachineRegisterInfo &MRI) {
- assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
-
- // Get information on the destination.
- Register DstReg = I.getOperand(0).getReg();
- const LLT DstTy = MRI.getType(DstReg);
- unsigned VecSize = DstTy.getSizeInBits();
-
- // Get information on the element we want to insert into the destination.
- Register EltReg = I.getOperand(2).getReg();
- const LLT EltTy = MRI.getType(EltReg);
- unsigned EltSize = EltTy.getSizeInBits();
- if (EltSize < 8 || EltSize > 64)
- return false;
-
- // Find the definition of the index. Bail out if it's not defined by a
- // G_CONSTANT.
- Register IdxReg = I.getOperand(3).getReg();
- auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI);
- if (!VRegAndVal)
- return false;
- unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
-
- // Perform the lane insert.
- Register SrcReg = I.getOperand(1).getReg();
- const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
-
- if (VecSize < 128) {
- // If the vector we're inserting into is smaller than 128 bits, widen it
- // to 128 to do the insert.
- MachineInstr *ScalarToVec =
- emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB);
- if (!ScalarToVec)
- return false;
- SrcReg = ScalarToVec->getOperand(0).getReg();
- }
-
- // Create an insert into a new FPR128 register.
- // Note that if our vector is already 128 bits, we end up emitting an extra
- // register.
- MachineInstr *InsMI =
- emitLaneInsert(std::nullopt, SrcReg, EltReg, LaneIdx, EltRB, MIB);
-
- if (VecSize < 128) {
- // If we had to widen to perform the insert, then we have to demote back to
- // the original size to get the result we want.
- if (!emitNarrowVector(DstReg, InsMI->getOperand(0).getReg(), MIB, MRI))
- return false;
- } else {
- // No widening needed.
- InsMI->getOperand(0).setReg(DstReg);
- constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
- }
-
- I.eraseFromParent();
- return true;
-}
-
MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
unsigned int Op;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index cbf5655706e694..b545c04e1652a4 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -870,8 +870,14 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampMaxNumElements(1, p0, 2);
getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
- .legalIf(typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64}))
- .widenVectorEltsToVectorMinSize(0, 64);
+ .legalIf(typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64, v2p0}))
+ .widenVectorEltsToVectorMinSize(0, 64)
+ .moreElementsToNextPow2(0)
+ .clampNumElements(0, v8s8, v16s8)
+ .clampNumElements(0, v4s16, v8s16)
+ .clampNumElements(0, v2s32, v4s32)
+ .clampMaxNumElements(0, s64, 2)
+ .clampMaxNumElements(0, p0, 2);
getActionDefinitionsBuilder(G_BUILD_VECTOR)
.legalFor({{v8s8, s8},
@@ -2001,11 +2007,11 @@ bool AArch64LegalizerInfo::legalizeFCopySign(MachineInstr &MI,
// Widen In1 and In2 to 128 bits. We want these to eventually become
// INSERT_SUBREGs.
auto Undef = MIRBuilder.buildUndef(VecTy);
- auto Zero = MIRBuilder.buildConstant(DstTy, 0);
+ auto ZeroIdx = MIRBuilder.buildConstant(LLT::scalar(64), 0);
auto Ins1 = MIRBuilder.buildInsertVectorElement(
- VecTy, Undef, MI.getOperand(1).getReg(), Zero);
+ VecTy, Undef, MI.getOperand(1).getReg(), ZeroIdx);
auto Ins2 = MIRBuilder.buildInsertVectorElement(
- VecTy, Undef, MI.getOperand(2).getReg(), Zero);
+ VecTy, Undef, MI.getOperand(2).getReg(), ZeroIdx);
// Construct the mask.
auto Mask = MIRBuilder.buildConstant(VecTy, EltMask);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 9bc5815ae05371..a12cac010f036e 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -37,6 +37,7 @@
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
@@ -475,6 +476,59 @@ void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
MI.eraseFromParent();
}
+bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) {
+ assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
+
+ auto ValAndVReg =
+ getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
+ return !ValAndVReg;
+}
+
+void applyNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &Builder) {
+ assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
+ bool InsertVal = true;
+ Builder.setInstrAndDebugLoc(MI);
+
+ Register Offset = MI.getOperand(3).getReg();
+ LLT VecTy = MRI.getType(MI.getOperand(0).getReg());
+...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
@@ -2121,6 +2120,31 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) { | |||
} | |||
return false; | |||
} | |||
case TargetOpcode::G_INSERT_VECTOR_ELT: { | |||
Register InsReg = I.getOperand(2).getReg(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add a comment documenting what this is trying to do and why.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've added a comment and rebased - thanks.
5fe4d0b
to
230b10e
Compare
230b10e
to
d2f36b1
Compare
Ping - I've rebased and fixed the RISCV tests. Thanks. |
Builder.buildPtrAdd(MRI.getType(StackTemp.getReg(0)), StackTemp, Mul) | ||
.getReg(0); | ||
|
||
if (InsertVal) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
InsertVal is set to true. Why can't this be unconditional then? Is this function missing something?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I forget where I got this code from now, but it was to allow it to be used with INSERT's and EXTRACT's. There will hopefully be a followup patch to apply to extracts too, if this one can get through review. It was smaller IIRC, but I can remove this for the moment and re-add it there. Thanks.
; CHECK-GI-NEXT: mov d1, v0.d[1] | ||
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 | ||
; CHECK-GI-NEXT: mov sp, x29 | ||
; CHECK-GI-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we know why there is codegen difference between SDAG and Global ISel?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are often a lot of little differences between SDAG and what is currently implemented in GISel. I think in this case the alignment is forced to be higher than it needs to be, so the stack gets aligned, due to this code:
Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty,
Align MinAlign) const {
// FIXME: We're missing a way to go back from LLT to llvm::Type to query the
// datalayout for the preferred alignment. Also there should be a target hook
// for this to allow targets to reduce the alignment and ignore the
// datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
// the type.
return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
}
66e02c5
to
03d3d62
Compare
03d3d62
to
b557c1c
Compare
✅ With the latest revision this PR passed the Python code formatter. |
f6327ac
to
dd8991a
Compare
bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) { | ||
assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT); | ||
|
||
auto ValAndVReg = | ||
getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI); | ||
return !ValAndVReg; | ||
} | ||
|
||
void applyNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI, | ||
MachineIRBuilder &Builder) { | ||
assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT); | ||
bool InsertVal = true; | ||
Builder.setInstrAndDebugLoc(MI); | ||
|
||
Register Offset = MI.getOperand(3).getReg(); | ||
LLT VecTy = MRI.getType(MI.getOperand(0).getReg()); | ||
LLT EltTy = MRI.getType(MI.getOperand(2).getReg()); | ||
LLT IdxTy = MRI.getType(MI.getOperand(3).getReg()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There were G_* wrappers added for inserts/extracts in 8bb9443 which you can use now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was hoping to make this method useful for both G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT in a follow-up, but have made it use the GInsertVectorElement in this one.
// Create a stack slot and store the vector into it | ||
MachineFunction &MF = Builder.getMF(); | ||
int FrameIdx = MF.getFrameInfo().CreateStackObject(VecTy.getSizeInBytes(), | ||
Align(8), false); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we set the alignment of the slot to be the vector size so 128b vectors get aligned on 16 byte boundaries?
b6b90ec
to
5e2b227
Compare
This attempts to standardize and extend some of the insert vector element lowering. Most notably: - More types are handled by splitting illegal vectors - The index type for G_INSERT_VECTOR_ELT is canonicalized to TLI.getVectorIdxTy(), similar to extact_vector_element. - Some of the existing patterns now have the index type specified to make sure they can apply to GISel too. - The C++ selection code has been removed, relying on tablegen patterns. - G_INSERT_VECTOR_ELT with small GPR input elements are pre-selected in reg-bank-select to use a i32 type, allowing the existing patterns to apply. - Variable index inserts are lowered in post-legalizer lowering, expanding into a stack store and reload.
This attempts to standardize and extend some of the insert vector element lowering. Most notably: