Skip to content

Commit e2b473d

Browse files
fixup! move to postlegalize lowering
1 parent e5a8236 commit e2b473d

File tree

6 files changed

+597
-370
lines changed

6 files changed

+597
-370
lines changed

llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp

Lines changed: 2 additions & 150 deletions
Original file line numberDiff line numberDiff line change
@@ -582,9 +582,9 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
582582
SplatActions.clampScalar(1, sXLen, sXLen);
583583

584584
getActionDefinitionsBuilder(G_INSERT_SUBVECTOR)
585-
.customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST),
585+
.legalIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST),
586586
typeIsLegalBoolVec(1, BoolVecTys, ST)))
587-
.customIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
587+
.legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
588588
typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)));
589589

590590
getLegacyLegalizerInfo().computeTables();
@@ -921,152 +921,6 @@ bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI,
921921
return true;
922922
}
923923

924-
static LLT getLMUL1Ty(LLT VecTy) {
925-
assert(VecTy.getElementType().getSizeInBits() <= 64 &&
926-
"Unexpected vector LLT");
927-
return LLT::scalable_vector(RISCV::RVVBitsPerBlock /
928-
VecTy.getElementType().getSizeInBits(),
929-
VecTy.getElementType());
930-
}
931-
932-
bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI,
933-
MachineIRBuilder &MIB) const {
934-
GInsertSubvector &IS = cast<GInsertSubvector>(MI);
935-
936-
MachineRegisterInfo &MRI = *MIB.getMRI();
937-
938-
Register Dst = IS.getReg(0);
939-
Register Src1 = IS.getBigVec();
940-
Register Src2 = IS.getSubVec();
941-
uint64_t Idx = IS.getIndexImm();
942-
943-
LLT BigTy = MRI.getType(Src1);
944-
LLT LitTy = MRI.getType(Src2);
945-
Register BigVec = Src1;
946-
Register LitVec = Src2;
947-
948-
// We don't have the ability to slide mask vectors up indexed by their i1
949-
// elements; the smallest we can do is i8. Often we are able to bitcast to
950-
// equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8
951-
// vectors and truncate down after the insert.
952-
if (LitTy.getElementType() == LLT::scalar(1) &&
953-
(Idx != 0 ||
954-
MRI.getVRegDef(BigVec)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)) {
955-
auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue();
956-
auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue();
957-
if (BigTyMinElts >= 8 && LitTyMinElts >= 8) {
958-
assert(Idx % 8 == 0 && "Invalid index");
959-
assert(BigTyMinElts % 8 == 0 && LitTyMinElts % 8 == 0 &&
960-
"Unexpected mask vector lowering");
961-
Idx /= 8;
962-
BigTy = LLT::vector(BigTy.getElementCount().divideCoefficientBy(8), 8);
963-
LitTy = LLT::vector(LitTy.getElementCount().divideCoefficientBy(8), 8);
964-
BigVec = MIB.buildBitcast(BigTy, BigVec).getReg(0);
965-
LitVec = MIB.buildBitcast(LitTy, LitVec).getReg(0);
966-
} else {
967-
// We can't slide this mask vector up indexed by its i1 elements.
968-
// This poses a problem when we wish to insert a scalable vector which
969-
// can't be re-expressed as a larger type. Just choose the slow path and
970-
// extend to a larger type, then truncate back down.
971-
LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
972-
LLT ExtLitTy = LitTy.changeElementType(LLT::scalar(8));
973-
auto BigZExt = MIB.buildZExt(ExtBigTy, BigVec);
974-
auto LitZExt = MIB.buildZExt(ExtLitTy, LitVec);
975-
auto Insert = MIB.buildInsertSubvector(ExtBigTy, BigZExt, LitZExt, Idx);
976-
auto SplatZero = MIB.buildSplatVector(
977-
ExtBigTy, MIB.buildConstant(ExtBigTy.getElementType(), 0));
978-
MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, Insert, SplatZero);
979-
MI.eraseFromParent();
980-
return true;
981-
}
982-
}
983-
984-
const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
985-
MVT LitTyMVT = getMVTForLLT(LitTy);
986-
unsigned SubRegIdx, RemIdx;
987-
std::tie(SubRegIdx, RemIdx) =
988-
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
989-
getMVTForLLT(BigTy), LitTyMVT, Idx, TRI);
990-
991-
RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(getMVTForLLT(LitTy));
992-
bool IsSubVecPartReg = !RISCVVType::decodeVLMUL(SubVecLMUL).second;
993-
994-
// If the Idx has been completely eliminated and this subvector's size is a
995-
// vector register or a multiple thereof, or the surrounding elements are
996-
// undef, then this is a subvector insert which naturally aligns to a vector
997-
// register. These can easily be handled using subregister manipulation.
998-
if (RemIdx == 0 && (!IsSubVecPartReg || MRI.getVRegDef(Src1)->getOpcode() ==
999-
TargetOpcode::G_IMPLICIT_DEF))
1000-
return true;
1001-
1002-
// If the subvector is smaller than a vector register, then the insertion
1003-
// must preserve the undisturbed elements of the register. We do this by
1004-
// lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
1005-
// (which resolves to a subregister copy), performing a VSLIDEUP to place the
1006-
// subvector within the vector register, and an INSERT_SUBVECTOR of that
1007-
// LMUL=1 type back into the larger vector (resolving to another subregister
1008-
// operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
1009-
// to avoid allocating a large register group to hold our subvector.
1010-
1011-
// VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
1012-
// OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
1013-
// (in our case undisturbed). This means we can set up a subvector insertion
1014-
// where OFFSET is the insertion offset, and the VL is the OFFSET plus the
1015-
// size of the subvector.
1016-
const LLT XLenTy(STI.getXLenVT());
1017-
LLT InterLitTy = BigTy;
1018-
Register AlignedExtract = Src1;
1019-
unsigned AlignedIdx = Idx - RemIdx;
1020-
if (TypeSize::isKnownGT(BigTy.getSizeInBits(),
1021-
getLMUL1Ty(BigTy).getSizeInBits())) {
1022-
InterLitTy = getLMUL1Ty(BigTy);
1023-
// Extract a subvector equal to the nearest full vector register type. This
1024-
// should resolve to a G_EXTRACT on a subreg.
1025-
AlignedExtract =
1026-
MIB.buildExtractSubvector(InterLitTy, BigVec, AlignedIdx).getReg(0);
1027-
}
1028-
1029-
auto Insert = MIB.buildInsertSubvector(InterLitTy, MIB.buildUndef(InterLitTy),
1030-
LitVec, 0);
1031-
1032-
auto [Mask, _] = buildDefaultVLOps(BigTy, MIB, MRI);
1033-
auto VL = MIB.buildVScale(XLenTy, LitTy.getElementCount().getKnownMinValue());
1034-
1035-
// Use tail agnostic policy if we're inserting over InterLitTy's tail.
1036-
ElementCount EndIndex =
1037-
ElementCount::getScalable(RemIdx) + LitTy.getElementCount();
1038-
uint64_t Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
1039-
if (EndIndex == InterLitTy.getElementCount())
1040-
Policy = RISCVII::TAIL_AGNOSTIC;
1041-
1042-
// If we're inserting into the lowest elements, use a tail undisturbed
1043-
// vmv.v.v.
1044-
MachineInstrBuilder Inserted;
1045-
if (RemIdx == 0) {
1046-
Inserted = MIB.buildInstr(RISCV::G_VMV_V_V_VL, {InterLitTy},
1047-
{AlignedExtract, Insert, VL});
1048-
} else {
1049-
auto SlideupAmt = MIB.buildVScale(XLenTy, RemIdx);
1050-
// Construct the vector length corresponding to RemIdx + length(LitTy).
1051-
VL = MIB.buildAdd(XLenTy, SlideupAmt, VL);
1052-
Inserted =
1053-
MIB.buildInstr(RISCV::G_VSLIDEUP_VL, {InterLitTy},
1054-
{AlignedExtract, LitVec, SlideupAmt, Mask, VL, Policy});
1055-
}
1056-
1057-
// If required, insert this subvector back into the correct vector register.
1058-
// This should resolve to an INSERT_SUBREG instruction.
1059-
if (TypeSize::isKnownGT(BigTy.getSizeInBits(), InterLitTy.getSizeInBits()))
1060-
Inserted = MIB.buildInsertSubvector(BigTy, BigVec, LitVec, AlignedIdx);
1061-
1062-
// We might have bitcast from a mask type: cast back to the original type if
1063-
// required.
1064-
MIB.buildBitcast(Dst, Inserted);
1065-
1066-
MI.eraseFromParent();
1067-
return true;
1068-
}
1069-
1070924
bool RISCVLegalizerInfo::legalizeCustom(
1071925
LegalizerHelper &Helper, MachineInstr &MI,
1072926
LostDebugLocObserver &LocObserver) const {
@@ -1137,8 +991,6 @@ bool RISCVLegalizerInfo::legalizeCustom(
1137991
return legalizeExt(MI, MIRBuilder);
1138992
case TargetOpcode::G_SPLAT_VECTOR:
1139993
return legalizeSplatVector(MI, MIRBuilder);
1140-
case TargetOpcode::G_INSERT_SUBVECTOR:
1141-
return legalizeInsertSubvector(MI, MIRBuilder);
1142994
case TargetOpcode::G_LOAD:
1143995
case TargetOpcode::G_STORE:
1144996
return legalizeLoadStore(MI, Helper, MIRBuilder);

llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ class RISCVLegalizerInfo : public LegalizerInfo {
4646
bool legalizeVScale(MachineInstr &MI, MachineIRBuilder &MIB) const;
4747
bool legalizeExt(MachineInstr &MI, MachineIRBuilder &MIRBuilder) const;
4848
bool legalizeSplatVector(MachineInstr &MI, MachineIRBuilder &MIB) const;
49-
bool legalizeInsertSubvector(MachineInstr &MI, MachineIRBuilder &MIB) const;
5049
bool legalizeLoadStore(MachineInstr &MI, LegalizerHelper &Helper,
5150
MachineIRBuilder &MIB) const;
5251
};

llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerLowering.cpp

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,184 @@ namespace {
4141
#include "RISCVGenPostLegalizeGILowering.inc"
4242
#undef GET_GICOMBINER_TYPES
4343

44+
static LLT getLMUL1Ty(LLT VecTy) {
45+
assert(VecTy.getElementType().getSizeInBits() <= 64 &&
46+
"Unexpected vector LLT");
47+
return LLT::scalable_vector(RISCV::RVVBitsPerBlock /
48+
VecTy.getElementType().getSizeInBits(),
49+
VecTy.getElementType());
50+
}
51+
52+
/// Return the type of the mask type suitable for masking the provided
53+
/// vector type. This is simply an i1 element type vector of the same
54+
/// (possibly scalable) length.
55+
static LLT getMaskTypeFor(LLT VecTy) {
56+
assert(VecTy.isVector());
57+
ElementCount EC = VecTy.getElementCount();
58+
return LLT::vector(EC, LLT::scalar(1));
59+
}
60+
61+
/// Creates an all ones mask suitable for masking a vector of type VecTy with
62+
/// vector length VL.
63+
static MachineInstrBuilder buildAllOnesMask(LLT VecTy, const SrcOp &VL,
64+
MachineIRBuilder &MIB,
65+
MachineRegisterInfo &MRI) {
66+
LLT MaskTy = getMaskTypeFor(VecTy);
67+
return MIB.buildInstr(RISCV::G_VMSET_VL, {MaskTy}, {VL});
68+
}
69+
70+
/// Gets the two common "VL" operands: an all-ones mask and the vector length.
71+
/// VecTy is a scalable vector type.
72+
static std::pair<MachineInstrBuilder, Register>
73+
buildDefaultVLOps(const DstOp &Dst, MachineIRBuilder &MIB,
74+
MachineRegisterInfo &MRI) {
75+
LLT VecTy = Dst.getLLTTy(MRI);
76+
assert(VecTy.isScalableVector() && "Expecting scalable container type");
77+
Register VL(RISCV::X0);
78+
MachineInstrBuilder Mask = buildAllOnesMask(VecTy, VL, MIB, MRI);
79+
return {Mask, VL};
80+
}
81+
82+
/// Lowers G_INSERT_SUBVECTOR. We know we can lower it here since the legalizer
83+
/// marked it as legal.
84+
void lowerInsertSubvector(MachineInstr &MI, const RISCVSubtarget &STI) {
85+
GInsertSubvector &IS = cast<GInsertSubvector>(MI);
86+
87+
MachineIRBuilder MIB(MI);
88+
MachineRegisterInfo &MRI = *MIB.getMRI();
89+
90+
Register Dst = IS.getReg(0);
91+
Register Src1 = IS.getBigVec();
92+
Register Src2 = IS.getSubVec();
93+
uint64_t Idx = IS.getIndexImm();
94+
95+
LLT BigTy = MRI.getType(Src1);
96+
LLT LitTy = MRI.getType(Src2);
97+
Register BigVec = Src1;
98+
Register LitVec = Src2;
99+
100+
// We don't have the ability to slide mask vectors up indexed by their i1
101+
// elements; the smallest we can do is i8. Often we are able to bitcast to
102+
// equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8
103+
// vectors and truncate down after the insert.
104+
if (LitTy.getElementType() == LLT::scalar(1) &&
105+
(Idx != 0 ||
106+
MRI.getVRegDef(BigVec)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)) {
107+
auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue();
108+
auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue();
109+
if (BigTyMinElts >= 8 && LitTyMinElts >= 8) {
110+
assert(Idx % 8 == 0 && "Invalid index");
111+
assert(BigTyMinElts % 8 == 0 && LitTyMinElts % 8 == 0 &&
112+
"Unexpected mask vector lowering");
113+
Idx /= 8;
114+
BigTy = LLT::vector(BigTy.getElementCount().divideCoefficientBy(8), 8);
115+
LitTy = LLT::vector(LitTy.getElementCount().divideCoefficientBy(8), 8);
116+
BigVec = MIB.buildBitcast(BigTy, BigVec).getReg(0);
117+
LitVec = MIB.buildBitcast(LitTy, LitVec).getReg(0);
118+
} else {
119+
// We can't slide this mask vector up indexed by its i1 elements.
120+
// This poses a problem when we wish to insert a scalable vector which
121+
// can't be re-expressed as a larger type. Just choose the slow path and
122+
// extend to a larger type, then truncate back down.
123+
LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
124+
LLT ExtLitTy = LitTy.changeElementType(LLT::scalar(8));
125+
auto BigZExt = MIB.buildZExt(ExtBigTy, BigVec);
126+
auto LitZExt = MIB.buildZExt(ExtLitTy, LitVec);
127+
auto Insert = MIB.buildInsertSubvector(ExtBigTy, BigZExt, LitZExt, Idx);
128+
auto SplatZero = MIB.buildSplatVector(
129+
ExtBigTy, MIB.buildConstant(ExtBigTy.getElementType(), 0));
130+
MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, Insert, SplatZero);
131+
MI.eraseFromParent();
132+
return;
133+
}
134+
}
135+
136+
const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
137+
MVT LitTyMVT = getMVTForLLT(LitTy);
138+
unsigned SubRegIdx, RemIdx;
139+
std::tie(SubRegIdx, RemIdx) =
140+
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
141+
getMVTForLLT(BigTy), LitTyMVT, Idx, TRI);
142+
143+
RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(getMVTForLLT(LitTy));
144+
bool IsSubVecPartReg = !RISCVVType::decodeVLMUL(SubVecLMUL).second;
145+
146+
// If the Idx has been completely eliminated and this subvector's size is a
147+
// vector register or a multiple thereof, or the surrounding elements are
148+
// undef, then this is a subvector insert which naturally aligns to a vector
149+
// register. These can easily be handled using subregister manipulation.
150+
if (RemIdx == 0 && (!IsSubVecPartReg || MRI.getVRegDef(Src1)->getOpcode() ==
151+
TargetOpcode::G_IMPLICIT_DEF))
152+
return;
153+
154+
// If the subvector is smaller than a vector register, then the insertion
155+
// must preserve the undisturbed elements of the register. We do this by
156+
// lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
157+
// (which resolves to a subregister copy), performing a VSLIDEUP to place the
158+
// subvector within the vector register, and an INSERT_SUBVECTOR of that
159+
// LMUL=1 type back into the larger vector (resolving to another subregister
160+
// operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
161+
// to avoid allocating a large register group to hold our subvector.
162+
163+
// VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
164+
// OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
165+
// (in our case undisturbed). This means we can set up a subvector insertion
166+
// where OFFSET is the insertion offset, and the VL is the OFFSET plus the
167+
// size of the subvector.
168+
const LLT XLenTy(STI.getXLenVT());
169+
LLT InterLitTy = BigTy;
170+
Register AlignedExtract = Src1;
171+
unsigned AlignedIdx = Idx - RemIdx;
172+
if (TypeSize::isKnownGT(BigTy.getSizeInBits(),
173+
getLMUL1Ty(BigTy).getSizeInBits())) {
174+
InterLitTy = getLMUL1Ty(BigTy);
175+
// Extract a subvector equal to the nearest full vector register type. This
176+
// should resolve to a G_EXTRACT on a subreg.
177+
AlignedExtract =
178+
MIB.buildExtractSubvector(InterLitTy, BigVec, AlignedIdx).getReg(0);
179+
}
180+
181+
auto Insert = MIB.buildInsertSubvector(InterLitTy, MIB.buildUndef(InterLitTy),
182+
LitVec, 0);
183+
184+
auto [Mask, _] = buildDefaultVLOps(BigTy, MIB, MRI);
185+
auto VL = MIB.buildVScale(XLenTy, LitTy.getElementCount().getKnownMinValue());
186+
187+
// Use tail agnostic policy if we're inserting over InterLitTy's tail.
188+
ElementCount EndIndex =
189+
ElementCount::getScalable(RemIdx) + LitTy.getElementCount();
190+
uint64_t Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
191+
if (EndIndex == InterLitTy.getElementCount())
192+
Policy = RISCVII::TAIL_AGNOSTIC;
193+
194+
// If we're inserting into the lowest elements, use a tail undisturbed
195+
// vmv.v.v.
196+
MachineInstrBuilder Inserted;
197+
if (RemIdx == 0) {
198+
Inserted = MIB.buildInstr(RISCV::G_VMV_V_V_VL, {InterLitTy},
199+
{AlignedExtract, Insert, VL});
200+
} else {
201+
auto SlideupAmt = MIB.buildVScale(XLenTy, RemIdx);
202+
// Construct the vector length corresponding to RemIdx + length(LitTy).
203+
VL = MIB.buildAdd(XLenTy, SlideupAmt, VL);
204+
Inserted =
205+
MIB.buildInstr(RISCV::G_VSLIDEUP_VL, {InterLitTy},
206+
{AlignedExtract, LitVec, SlideupAmt, Mask, VL, Policy});
207+
}
208+
209+
// If required, insert this subvector back into the correct vector register.
210+
// This should resolve to an INSERT_SUBREG instruction.
211+
if (TypeSize::isKnownGT(BigTy.getSizeInBits(), InterLitTy.getSizeInBits()))
212+
Inserted = MIB.buildInsertSubvector(BigTy, BigVec, LitVec, AlignedIdx);
213+
214+
// We might have bitcast from a mask type: cast back to the original type if
215+
// required.
216+
MIB.buildBitcast(Dst, Inserted);
217+
218+
MI.eraseFromParent();
219+
return;
220+
}
221+
44222
class RISCVPostLegalizerLoweringImpl : public Combiner {
45223
protected:
46224
// TODO: Make CombinerHelper methods const.

llvm/lib/Target/RISCV/RISCVCombine.td

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,17 @@ def RISCVO0PreLegalizerCombiner: GICombiner<
1919
"RISCVO0PreLegalizerCombinerImpl", [optnone_combines]> {
2020
}
2121

22+
def lower_insert_subvector : GICombineRule<
23+
(defs root:$root),
24+
(match (G_INSERT_SUBVECTOR $dst, $src1, $src2, $idx):$root),
25+
(apply [{ lowerInsertSubvector(*${root}, STI); }])
26+
>;
27+
2228
// Post-legalization combines which should happen at all optimization levels.
2329
// (E.g. ones that facilitate matching for the selector) For example, matching
2430
// pseudos.
2531
def RISCVPostLegalizerLowering
26-
: GICombiner<"RISCVPostLegalizerLoweringImpl", []> {
32+
: GICombiner<"RISCVPostLegalizerLoweringImpl", [lower_insert_subvector]> {
2733
}
2834

2935
// Post-legalization combines which are primarily optimizations.

0 commit comments

Comments
 (0)