Skip to content

Commit 8731799

Browse files
committed
[Legalizer] Making artifact combining order-independent
Legalization algorithm is complicated by two facts: 1) While regular instructions should be possible to legalize in an isolated, per-instruction, context-free manner, legalization artifacts can only be eliminated in pairs, which could be deeply, and ultimately arbitrary nested: { [ () ] }, where which paranthesis kind depicts an artifact kind, like extend, unmerge, etc. Such structure can only be fully eliminated by simple local combines if they are attempted in a particular order (inside out), or alternatively by repeated scans each eliminating only one innermost pair, resulting in O(n^2) complexity. 2) Some artifacts might in fact be regular instructions that could (and sometimes should) be legalized by the target-specific rules. Which means failure to eliminate all artifacts on the first iteration is not a failure, they need to be tried as instructions, which may produce more artifacts, including the ones that are in fact regular instructions, resulting in a non-constant number of iterations required to finish the process. I trust the recently introduced termination condition (no new artifacts were created during as-a-regular-instruction-retrial of artifacts not eliminated on the previous iteration) to be efficient in providing termination, but only performing the legalization in full if and only if at each step such chains of artifacts are successfully eliminated in full as well. Which is currently not guaranteed, as the artifact combines are applied only once and in an arbitrary order that has to do with the order of creation or insertion of artifacts into their worklist, which is a no particular order. In this patch I make a small change to the artifact combiner, making it to re-insert into the worklist immediate (modulo a look-through copies) artifact users of each vreg that changes its definition due to an artifact combine. Here the first scan through the artifacts worklist, while not being done in any guaranteed order, only needs to find the innermost pair(s) of artifacts that could be immediately combined out. After that the process follows def-use chains, making them shorter at each step, thus combining everything that can be combined in O(n) time. Reviewers: volkan, aditya_nandakumar, qcolombet, paquette, aemerson, dsanders Reviewed By: aditya_nandakumar, paquette Tags: #llvm Differential Revision: https://reviews.llvm.org/D71448
1 parent 18bf967 commit 8731799

File tree

10 files changed

+348
-144
lines changed

10 files changed

+348
-144
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h

Lines changed: 106 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ class LegalizationArtifactCombiner {
4646
: Builder(B), MRI(MRI), LI(LI) {}
4747

4848
bool tryCombineAnyExt(MachineInstr &MI,
49-
SmallVectorImpl<MachineInstr *> &DeadInsts) {
49+
SmallVectorImpl<MachineInstr *> &DeadInsts,
50+
SmallVectorImpl<Register> &UpdatedDefs) {
5051
assert(MI.getOpcode() == TargetOpcode::G_ANYEXT);
5152

5253
Builder.setInstr(MI);
@@ -58,6 +59,7 @@ class LegalizationArtifactCombiner {
5859
if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
5960
LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
6061
Builder.buildAnyExtOrTrunc(DstReg, TruncSrc);
62+
UpdatedDefs.push_back(DstReg);
6163
markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
6264
return true;
6365
}
@@ -70,6 +72,7 @@ class LegalizationArtifactCombiner {
7072
m_GSExt(m_Reg(ExtSrc)),
7173
m_GZExt(m_Reg(ExtSrc)))))) {
7274
Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc});
75+
UpdatedDefs.push_back(DstReg);
7376
markInstAndDefDead(MI, *ExtMI, DeadInsts);
7477
return true;
7578
}
@@ -83,15 +86,17 @@ class LegalizationArtifactCombiner {
8386
auto &CstVal = SrcMI->getOperand(1);
8487
Builder.buildConstant(
8588
DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits()));
89+
UpdatedDefs.push_back(DstReg);
8690
markInstAndDefDead(MI, *SrcMI, DeadInsts);
8791
return true;
8892
}
8993
}
90-
return tryFoldImplicitDef(MI, DeadInsts);
94+
return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
9195
}
9296

9397
bool tryCombineZExt(MachineInstr &MI,
94-
SmallVectorImpl<MachineInstr *> &DeadInsts) {
98+
SmallVectorImpl<MachineInstr *> &DeadInsts,
99+
SmallVectorImpl<Register> &UpdatedDefs) {
95100
assert(MI.getOpcode() == TargetOpcode::G_ZEXT);
96101

97102
Builder.setInstr(MI);
@@ -124,15 +129,17 @@ class LegalizationArtifactCombiner {
124129
auto &CstVal = SrcMI->getOperand(1);
125130
Builder.buildConstant(
126131
DstReg, CstVal.getCImm()->getValue().zext(DstTy.getSizeInBits()));
132+
UpdatedDefs.push_back(DstReg);
127133
markInstAndDefDead(MI, *SrcMI, DeadInsts);
128134
return true;
129135
}
130136
}
131-
return tryFoldImplicitDef(MI, DeadInsts);
137+
return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
132138
}
133139

134140
bool tryCombineSExt(MachineInstr &MI,
135-
SmallVectorImpl<MachineInstr *> &DeadInsts) {
141+
SmallVectorImpl<MachineInstr *> &DeadInsts,
142+
SmallVectorImpl<Register> &UpdatedDefs) {
136143
assert(MI.getOpcode() == TargetOpcode::G_SEXT);
137144

138145
Builder.setInstr(MI);
@@ -154,11 +161,12 @@ class LegalizationArtifactCombiner {
154161
markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
155162
return true;
156163
}
157-
return tryFoldImplicitDef(MI, DeadInsts);
164+
return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
158165
}
159166

160167
bool tryCombineTrunc(MachineInstr &MI,
161-
SmallVectorImpl<MachineInstr *> &DeadInsts) {
168+
SmallVectorImpl<MachineInstr *> &DeadInsts,
169+
SmallVectorImpl<Register> &UpdatedDefs) {
162170
assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
163171

164172
Builder.setInstr(MI);
@@ -174,6 +182,7 @@ class LegalizationArtifactCombiner {
174182
auto &CstVal = SrcMI->getOperand(1);
175183
Builder.buildConstant(
176184
DstReg, CstVal.getCImm()->getValue().trunc(DstTy.getSizeInBits()));
185+
UpdatedDefs.push_back(DstReg);
177186
markInstAndDefDead(MI, *SrcMI, DeadInsts);
178187
return true;
179188
}
@@ -182,10 +191,10 @@ class LegalizationArtifactCombiner {
182191
return false;
183192
}
184193

185-
186194
/// Try to fold G_[ASZ]EXT (G_IMPLICIT_DEF).
187195
bool tryFoldImplicitDef(MachineInstr &MI,
188-
SmallVectorImpl<MachineInstr *> &DeadInsts) {
196+
SmallVectorImpl<MachineInstr *> &DeadInsts,
197+
SmallVectorImpl<Register> &UpdatedDefs) {
189198
unsigned Opcode = MI.getOpcode();
190199
assert(Opcode == TargetOpcode::G_ANYEXT || Opcode == TargetOpcode::G_ZEXT ||
191200
Opcode == TargetOpcode::G_SEXT);
@@ -202,13 +211,15 @@ class LegalizationArtifactCombiner {
202211
return false;
203212
LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;);
204213
Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, {DstReg}, {});
214+
UpdatedDefs.push_back(DstReg);
205215
} else {
206216
// G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top
207217
// bits will be 0 for G_ZEXT and 0/1 for the G_SEXT.
208218
if (isConstantUnsupported(DstTy))
209219
return false;
210220
LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;);
211221
Builder.buildConstant(DstReg, 0);
222+
UpdatedDefs.push_back(DstReg);
212223
}
213224

214225
markInstAndDefDead(MI, *DefMI, DeadInsts);
@@ -269,7 +280,8 @@ class LegalizationArtifactCombiner {
269280
}
270281

271282
bool tryCombineMerges(MachineInstr &MI,
272-
SmallVectorImpl<MachineInstr *> &DeadInsts) {
283+
SmallVectorImpl<MachineInstr *> &DeadInsts,
284+
SmallVectorImpl<Register> &UpdatedDefs) {
273285
assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
274286

275287
unsigned NumDefs = MI.getNumOperands() - 1;
@@ -319,8 +331,8 @@ class LegalizationArtifactCombiner {
319331
SmallVector<Register, 2> TmpRegs;
320332
// This is a vector that is being scalarized and casted. Extract to
321333
// the element type, and do the conversion on the scalars.
322-
LLT MergeEltTy
323-
= MRI.getType(MergeI->getOperand(0).getReg()).getElementType();
334+
LLT MergeEltTy =
335+
MRI.getType(MergeI->getOperand(0).getReg()).getElementType();
324336
for (unsigned j = 0; j < NumMergeRegs; ++j)
325337
TmpRegs.push_back(MRI.createGenericVirtualRegister(MergeEltTy));
326338

@@ -331,6 +343,7 @@ class LegalizationArtifactCombiner {
331343
} else {
332344
Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg());
333345
}
346+
UpdatedDefs.append(DstRegs.begin(), DstRegs.end());
334347
}
335348

336349
} else if (NumMergeRegs > NumDefs) {
@@ -352,7 +365,9 @@ class LegalizationArtifactCombiner {
352365
++j, ++Idx)
353366
Regs.push_back(MergeI->getOperand(Idx).getReg());
354367

355-
Builder.buildMerge(MI.getOperand(DefIdx).getReg(), Regs);
368+
Register DefReg = MI.getOperand(DefIdx).getReg();
369+
Builder.buildMerge(DefReg, Regs);
370+
UpdatedDefs.push_back(DefReg);
356371
}
357372

358373
} else {
@@ -366,8 +381,9 @@ class LegalizationArtifactCombiner {
366381

367382
for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
368383
Register MergeSrc = MergeI->getOperand(Idx + 1).getReg();
369-
Builder.buildInstr(ConvertOp, {MI.getOperand(Idx).getReg()},
370-
{MergeSrc});
384+
Register DefReg = MI.getOperand(Idx).getReg();
385+
Builder.buildInstr(ConvertOp, {DefReg}, {MergeSrc});
386+
UpdatedDefs.push_back(DefReg);
371387
}
372388

373389
markInstAndDefDead(MI, *MergeI, DeadInsts);
@@ -378,9 +394,11 @@ class LegalizationArtifactCombiner {
378394
"Bitcast and the other kinds of conversions should "
379395
"have happened earlier");
380396

381-
for (unsigned Idx = 0; Idx < NumDefs; ++Idx)
382-
MRI.replaceRegWith(MI.getOperand(Idx).getReg(),
383-
MergeI->getOperand(Idx + 1).getReg());
397+
for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
398+
Register NewDef = MergeI->getOperand(Idx + 1).getReg();
399+
MRI.replaceRegWith(MI.getOperand(Idx).getReg(), NewDef);
400+
UpdatedDefs.push_back(NewDef);
401+
}
384402
}
385403

386404
markInstAndDefDead(MI, *MergeI, DeadInsts);
@@ -399,7 +417,8 @@ class LegalizationArtifactCombiner {
399417
}
400418

401419
bool tryCombineExtract(MachineInstr &MI,
402-
SmallVectorImpl<MachineInstr *> &DeadInsts) {
420+
SmallVectorImpl<MachineInstr *> &DeadInsts,
421+
SmallVectorImpl<Register> &UpdatedDefs) {
403422
assert(MI.getOpcode() == TargetOpcode::G_EXTRACT);
404423

405424
// Try to use the source registers from a G_MERGE_VALUES
@@ -414,13 +433,14 @@ class LegalizationArtifactCombiner {
414433
// for N >= %2.getSizeInBits() / 2
415434
// %3 = G_EXTRACT %1, (N - %0.getSizeInBits()
416435

417-
unsigned Src = lookThroughCopyInstrs(MI.getOperand(1).getReg());
418-
MachineInstr *MergeI = MRI.getVRegDef(Src);
436+
Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
437+
MachineInstr *MergeI = MRI.getVRegDef(SrcReg);
419438
if (!MergeI || !isMergeLikeOpcode(MergeI->getOpcode()))
420439
return false;
421440

422-
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
423-
LLT SrcTy = MRI.getType(Src);
441+
Register DstReg = MI.getOperand(0).getReg();
442+
LLT DstTy = MRI.getType(DstReg);
443+
LLT SrcTy = MRI.getType(SrcReg);
424444

425445
// TODO: Do we need to check if the resulting extract is supported?
426446
unsigned ExtractDstSize = DstTy.getSizeInBits();
@@ -438,10 +458,9 @@ class LegalizationArtifactCombiner {
438458

439459
// TODO: We could modify MI in place in most cases.
440460
Builder.setInstr(MI);
441-
Builder.buildExtract(
442-
MI.getOperand(0).getReg(),
443-
MergeI->getOperand(MergeSrcIdx + 1).getReg(),
444-
Offset - MergeSrcIdx * MergeSrcSize);
461+
Builder.buildExtract(DstReg, MergeI->getOperand(MergeSrcIdx + 1).getReg(),
462+
Offset - MergeSrcIdx * MergeSrcSize);
463+
UpdatedDefs.push_back(DstReg);
445464
markInstAndDefDead(MI, *MergeI, DeadInsts);
446465
return true;
447466
}
@@ -458,33 +477,79 @@ class LegalizationArtifactCombiner {
458477
// etc, process the dead instructions now if any.
459478
if (!DeadInsts.empty())
460479
deleteMarkedDeadInsts(DeadInsts, WrapperObserver);
480+
481+
// Put here every vreg that was redefined in such a way that it's at least
482+
// possible that one (or more) of its users (immediate or COPY-separated)
483+
// could become artifact combinable with the new definition (or the
484+
// instruction reachable from it through a chain of copies if any).
485+
SmallVector<Register, 4> UpdatedDefs;
486+
bool Changed = false;
461487
switch (MI.getOpcode()) {
462488
default:
463489
return false;
464490
case TargetOpcode::G_ANYEXT:
465-
return tryCombineAnyExt(MI, DeadInsts);
491+
Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs);
492+
break;
466493
case TargetOpcode::G_ZEXT:
467-
return tryCombineZExt(MI, DeadInsts);
494+
Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs);
495+
break;
468496
case TargetOpcode::G_SEXT:
469-
return tryCombineSExt(MI, DeadInsts);
497+
Changed = tryCombineSExt(MI, DeadInsts, UpdatedDefs);
498+
break;
470499
case TargetOpcode::G_UNMERGE_VALUES:
471-
return tryCombineMerges(MI, DeadInsts);
500+
Changed = tryCombineMerges(MI, DeadInsts, UpdatedDefs);
501+
break;
472502
case TargetOpcode::G_EXTRACT:
473-
return tryCombineExtract(MI, DeadInsts);
474-
case TargetOpcode::G_TRUNC: {
475-
if (tryCombineTrunc(MI, DeadInsts))
476-
return true;
477-
478-
bool Changed = false;
479-
for (auto &Use : MRI.use_instructions(MI.getOperand(0).getReg()))
480-
Changed |= tryCombineInstruction(Use, DeadInsts, WrapperObserver);
481-
return Changed;
503+
Changed = tryCombineExtract(MI, DeadInsts, UpdatedDefs);
504+
break;
505+
case TargetOpcode::G_TRUNC:
506+
Changed = tryCombineTrunc(MI, DeadInsts, UpdatedDefs);
507+
if (!Changed) {
508+
// Try to combine truncates away even if they are legal. As all artifact
509+
// combines at the moment look only "up" the def-use chains, we achieve
510+
// that by throwing truncates' users (with look through copies) into the
511+
// ArtifactList again.
512+
UpdatedDefs.push_back(MI.getOperand(0).getReg());
513+
}
514+
break;
482515
}
516+
// If the main loop through the ArtifactList found at least one combinable
517+
// pair of artifacts, not only combine it away (as done above), but also
518+
// follow the def-use chain from there to combine everything that can be
519+
// combined within this def-use chain of artifacts.
520+
while (!UpdatedDefs.empty()) {
521+
Register NewDef = UpdatedDefs.pop_back_val();
522+
assert(NewDef.isVirtual() && "Unexpected redefinition of a physreg");
523+
for (MachineInstr &Use : MRI.use_instructions(NewDef)) {
524+
switch (Use.getOpcode()) {
525+
// Keep this list in sync with the list of all artifact combines.
526+
case TargetOpcode::G_ANYEXT:
527+
case TargetOpcode::G_ZEXT:
528+
case TargetOpcode::G_SEXT:
529+
case TargetOpcode::G_UNMERGE_VALUES:
530+
case TargetOpcode::G_EXTRACT:
531+
case TargetOpcode::G_TRUNC:
532+
// Adding Use to ArtifactList.
533+
WrapperObserver.changedInstr(Use);
534+
break;
535+
case TargetOpcode::COPY: {
536+
Register Copy = Use.getOperand(0).getReg();
537+
if (Copy.isVirtual())
538+
UpdatedDefs.push_back(Copy);
539+
break;
540+
}
541+
default:
542+
// If we do not have an artifact combine for the opcode, there is no
543+
// point in adding it to the ArtifactList as nothing interesting will
544+
// be done to it anyway.
545+
break;
546+
}
547+
}
483548
}
549+
return Changed;
484550
}
485551

486552
private:
487-
488553
static unsigned getArtifactSrcReg(const MachineInstr &MI) {
489554
switch (MI.getOpcode()) {
490555
case TargetOpcode::COPY:

llvm/lib/CodeGen/GlobalISel/Legalizer.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,11 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
212212
// ArtifactCombiner to combine away them.
213213
if (isArtifact(MI)) {
214214
LLVM_DEBUG(dbgs() << ".. Not legalized, moving to artifacts retry\n");
215+
assert(NumArtifacts == 0 &&
216+
"Artifacts are only expected in instruction list starting the "
217+
"second iteration, but each iteration starting second must "
218+
"start with an empty artifacts list");
219+
(void)NumArtifacts;
215220
RetryList.push_back(&MI);
216221
continue;
217222
}
@@ -224,7 +229,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
224229
// Try to combine the instructions in RetryList again if there
225230
// are new artifacts. If not, stop legalizing.
226231
if (!RetryList.empty()) {
227-
if (ArtifactList.size() > NumArtifacts) {
232+
if (!ArtifactList.empty()) {
228233
while (!RetryList.empty())
229234
ArtifactList.insert(RetryList.pop_back_val());
230235
} else {

llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -651,8 +651,9 @@ void MipsRegisterBankInfo::setRegBank(MachineInstr &MI,
651651
static void
652652
combineAwayG_UNMERGE_VALUES(LegalizationArtifactCombiner &ArtCombiner,
653653
MachineInstr &MI) {
654+
SmallVector<Register, 4> UpdatedDefs;
654655
SmallVector<MachineInstr *, 2> DeadInstrs;
655-
ArtCombiner.tryCombineMerges(MI, DeadInstrs);
656+
ArtCombiner.tryCombineMerges(MI, DeadInstrs, UpdatedDefs);
656657
for (MachineInstr *DeadMI : DeadInstrs)
657658
DeadMI->eraseFromParent();
658659
}

llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ define void @nonpow2_load_narrowing() {
167167
ret void
168168
}
169169

170-
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: cannot select: %5:fpr32(s32) = G_EXTRACT %{{[0-9]+}}:fpr(s128), 64 (in function: nonpow2_store_narrowing)
170+
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: cannot select: %14:gpr64(s64), %15:gpr(s1) = G_UADDE %17:gpr, %17:gpr, %13:gpr (in function: nonpow2_store_narrowing)
171171
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_store_narrowing
172172
; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_store_narrowing:
173173
define void @nonpow2_store_narrowing(i96* %c) {

llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@ body: |
1515
; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
1616
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SHL]](s32)
1717
; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[COPY1]]
18-
; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[OR]](s32)
19-
; CHECK: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8)
20-
; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY2]](s8)
18+
; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32)
2119
; CHECK: $x0 = COPY [[ANYEXT]](s64)
2220
%0:_(s64) = G_CONSTANT i64 0
2321
%1:_(s4) = G_TRUNC %0

0 commit comments

Comments
 (0)