Skip to content

Commit ade4140

Browse files
committed
[X86][CodeGen] Support using NF instructions for flag copy lowering
1 parent 5988c79 commit ade4140

File tree

2 files changed

+449
-18
lines changed

2 files changed

+449
-18
lines changed

llvm/lib/Target/X86/X86FlagsCopyLowering.cpp

Lines changed: 110 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated");
6565
STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted");
6666
STATISTIC(NumTestsInserted, "Number of test instructions inserted");
6767
STATISTIC(NumAddsInserted, "Number of adds instructions inserted");
68+
STATISTIC(NumNFsConvertedTo, "Number of NF instructions converted to");
6869

6970
namespace {
7071

@@ -235,6 +236,27 @@ static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB,
235236
return NewMBB;
236237
}
237238

239+
enum EFLAGSClobber { NoClobber, EvitableClobber, InevitableClobber };
240+
241+
#define GET_X86_NF_TRANSFORM_TABLE
242+
#include "X86GenInstrMapping.inc"
243+
static unsigned getNFVariant(unsigned Opc) {
244+
ArrayRef<X86TableEntry> Table = ArrayRef(X86NFTransformTable);
245+
const auto I = llvm::lower_bound(Table, Opc);
246+
return (I == Table.end() || I->OldOpc != Opc) ? 0U : I->NewOpc;
247+
}
248+
249+
static EFLAGSClobber getClobberType(const MachineInstr &MI) {
250+
const MachineOperand *FlagDef =
251+
MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
252+
if (!FlagDef)
253+
return NoClobber;
254+
if (FlagDef->isDead() && getNFVariant(MI.getOpcode()))
255+
return EvitableClobber;
256+
257+
return InevitableClobber;
258+
}
259+
238260
bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
239261
LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
240262
<< " **********\n");
@@ -254,14 +276,100 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
254276
// turn copied again we visit the first one first. This ensures we can find
255277
// viable locations for testing the original EFLAGS that dominate all the
256278
// uses across complex CFGs.
257-
SmallVector<MachineInstr *, 4> Copies;
279+
SmallSetVector<MachineInstr *, 4> Copies;
258280
ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
259281
for (MachineBasicBlock *MBB : RPOT)
260282
for (MachineInstr &MI : *MBB)
261283
if (MI.getOpcode() == TargetOpcode::COPY &&
262284
MI.getOperand(0).getReg() == X86::EFLAGS)
263-
Copies.push_back(&MI);
285+
Copies.insert(&MI);
286+
287+
// Try to elminate the copys by transform the instructions between copy and
288+
// copydef to the NF (no flags update) variants, e.g.
289+
//
290+
// %1:gr64 = COPY $eflags
291+
// OP1 implicit-def dead $eflags
292+
// $eflags = COPY %1
293+
// OP2 cc, implicit $eflags
294+
//
295+
// ->
296+
//
297+
// OP1_NF
298+
// OP2 implicit $eflags
299+
if (Subtarget->hasNF()) {
300+
SmallSetVector<MachineInstr *, 4> RemovedCopies;
301+
// CopyIIt may be invalidated by removing copies.
302+
auto CopyIIt = Copies.begin(), CopyIEnd = Copies.end();
303+
while (CopyIIt != CopyIEnd) {
304+
auto NCopyIIt = std::next(CopyIIt);
305+
SmallSetVector<MachineInstr *, 4> EvitableClobbers;
306+
MachineInstr *CopyI = *CopyIIt;
307+
MachineOperand &VOp = CopyI->getOperand(1);
308+
MachineInstr *CopyDefI = MRI->getVRegDef(VOp.getReg());
309+
MachineBasicBlock *CopyIMBB = CopyI->getParent();
310+
MachineBasicBlock *CopyDefIMBB = CopyDefI->getParent();
311+
// Walk all basic blocks reachable in depth-first iteration on the inverse
312+
// CFG from CopyIMBB to CopyDefIMBB. These blocks are all the blocks that
313+
// may be executed between the execution of CopyDefIMBB and CopyIMBB. On
314+
// all execution paths, instructions from CopyDefI to CopyI (exclusive)
315+
// has to be NF-convertible if it clobbers flags.
316+
for (auto BI = idf_begin(CopyIMBB), BE = idf_end(CopyDefIMBB); BI != BE;
317+
++BI) {
318+
MachineBasicBlock *MBB = *BI;
319+
for (auto I = (MBB != CopyDefIMBB)
320+
? MBB->begin()
321+
: std::next(MachineBasicBlock::iterator(CopyDefI)),
322+
E = (MBB != CopyIMBB) ? MBB->end()
323+
: MachineBasicBlock::iterator(CopyI);
324+
I != E; ++I) {
325+
MachineInstr &MI = *I;
326+
EFLAGSClobber ClobberType = getClobberType(MI);
327+
if (ClobberType == NoClobber)
328+
continue;
329+
330+
if (ClobberType == InevitableClobber)
331+
goto ProcessNextCopyI;
332+
333+
assert(ClobberType == EvitableClobber && "unexpected workflow");
334+
EvitableClobbers.insert(&MI);
335+
}
336+
}
337+
// Covert evitable clobbers into NF variants and remove the copyies.
338+
RemovedCopies.insert(CopyI);
339+
RemovedCopies.insert(CopyDefI);
340+
CopyI->eraseFromParent();
341+
CopyDefI->eraseFromParent();
342+
++NumCopiesEliminated;
343+
for (auto *Clobber : EvitableClobbers) {
344+
unsigned NewOpc = getNFVariant(Clobber->getOpcode());
345+
assert(NewOpc && "evitable clobber must have a NF variant");
346+
Clobber->setDesc(TII->get(NewOpc));
347+
Clobber->removeOperand(
348+
Clobber->findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr)
349+
->getOperandNo());
350+
++NumNFsConvertedTo;
351+
}
352+
ProcessNextCopyI:
353+
CopyIIt = NCopyIIt;
354+
}
355+
Copies.set_subtract(RemovedCopies);
356+
}
264357

358+
// For the rest of copies that cannot be eliminated by NF transform, we use
359+
// setcc to preserve the flags in GPR32 before OP1, and recheck its value
360+
// before using the flags, e.g.
361+
//
362+
// %1:gr64 = COPY $eflags
363+
// OP1 implicit-def dead $eflags
364+
// $eflags = COPY %1
365+
// OP2 cc, implicit $eflags
366+
//
367+
// ->
368+
//
369+
// %1:gr8 = SETCCr cc, implicit $eflags
370+
// OP1 implicit-def dead $eflags
371+
// TEST8rr %1, %1, implicit-def $eflags
372+
// OP2 ne, implicit $eflags
265373
for (MachineInstr *CopyI : Copies) {
266374
MachineBasicBlock &MBB = *CopyI->getParent();
267375

0 commit comments

Comments
 (0)