Skip to content

Commit d9d2516

Browse files
AMDGPU/GlobalISel: Rework legalization for extract/insert vector elt
Use G_MERGE_VALUES and G_UNMERGE_VALUES on vector elements instead of G_EXTRACT and G_INSERT when doing custom legalization for G_EXTRACT_VECTOR_ELT and G_INSERT_VECTOR_ELT. With this approach legalization artifact combiner gets direct access to all vector elements. Differential Revision: https://reviews.llvm.org/D116115
1 parent 003ac23 commit d9d2516

File tree

1 file changed

+16
-6
lines changed

1 file changed

+16
-6
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2213,10 +2213,12 @@ bool AMDGPULegalizerInfo::legalizeExtractVectorElt(
22132213
LLT EltTy = VecTy.getElementType();
22142214
assert(EltTy == MRI.getType(Dst));
22152215

2216-
if (IdxVal < VecTy.getNumElements())
2217-
B.buildExtract(Dst, Vec, IdxVal * EltTy.getSizeInBits());
2218-
else
2216+
if (IdxVal < VecTy.getNumElements()) {
2217+
auto Unmerge = B.buildUnmerge(EltTy, Vec);
2218+
B.buildCopy(Dst, Unmerge.getReg(IdxVal));
2219+
} else {
22192220
B.buildUndef(Dst);
2221+
}
22202222

22212223
MI.eraseFromParent();
22222224
return true;
@@ -2246,10 +2248,18 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
22462248
LLT EltTy = VecTy.getElementType();
22472249
assert(EltTy == MRI.getType(Ins));
22482250

2249-
if (IdxVal < VecTy.getNumElements())
2250-
B.buildInsert(Dst, Vec, Ins, IdxVal * EltTy.getSizeInBits());
2251-
else
2251+
unsigned NumElts = VecTy.getNumElements();
2252+
if (IdxVal < NumElts) {
2253+
SmallVector<Register, 8> SrcRegs;
2254+
for (unsigned i = 0; i < NumElts; ++i)
2255+
SrcRegs.push_back(MRI.createGenericVirtualRegister(EltTy));
2256+
B.buildUnmerge(SrcRegs, Vec);
2257+
2258+
SrcRegs[IdxVal] = MI.getOperand(2).getReg();
2259+
B.buildMerge(Dst, SrcRegs);
2260+
} else {
22522261
B.buildUndef(Dst);
2262+
}
22532263

22542264
MI.eraseFromParent();
22552265
return true;

0 commit comments

Comments
 (0)