@@ -65,6 +65,7 @@ STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated");
65
65
STATISTIC (NumSetCCsInserted, " Number of setCC instructions inserted" );
66
66
STATISTIC (NumTestsInserted, " Number of test instructions inserted" );
67
67
STATISTIC (NumAddsInserted, " Number of adds instructions inserted" );
68
+ STATISTIC (NumNFsConvertedTo, " Number of NF instructions converted to" );
68
69
69
70
namespace {
70
71
@@ -235,6 +236,27 @@ static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB,
235
236
return NewMBB;
236
237
}
237
238
239
+ enum EFLAGSClobber { NoClobber, EvitableClobber, InevitableClobber };
240
+
241
+ #define GET_X86_NF_TRANSFORM_TABLE
242
+ #include " X86GenInstrMapping.inc"
243
+ static unsigned getNFVariant (unsigned Opc) {
244
+ ArrayRef<X86TableEntry> Table = ArrayRef (X86NFTransformTable);
245
+ const auto I = llvm::lower_bound (Table, Opc);
246
+ return (I == Table.end () || I->OldOpc != Opc) ? 0U : I->NewOpc ;
247
+ }
248
+
249
+ static EFLAGSClobber getClobberType (const MachineInstr &MI) {
250
+ const MachineOperand *FlagDef =
251
+ MI.findRegisterDefOperand (X86::EFLAGS, /* TRI=*/ nullptr );
252
+ if (!FlagDef)
253
+ return NoClobber;
254
+ if (FlagDef->isDead () && getNFVariant (MI.getOpcode ()))
255
+ return EvitableClobber;
256
+
257
+ return InevitableClobber;
258
+ }
259
+
238
260
bool X86FlagsCopyLoweringPass::runOnMachineFunction (MachineFunction &MF) {
239
261
LLVM_DEBUG (dbgs () << " ********** " << getPassName () << " : " << MF.getName ()
240
262
<< " **********\n " );
@@ -254,14 +276,100 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
254
276
// turn copied again we visit the first one first. This ensures we can find
255
277
// viable locations for testing the original EFLAGS that dominate all the
256
278
// uses across complex CFGs.
257
- SmallVector <MachineInstr *, 4 > Copies;
279
+ SmallSetVector <MachineInstr *, 4 > Copies;
258
280
ReversePostOrderTraversal<MachineFunction *> RPOT (&MF);
259
281
for (MachineBasicBlock *MBB : RPOT)
260
282
for (MachineInstr &MI : *MBB)
261
283
if (MI.getOpcode () == TargetOpcode::COPY &&
262
284
MI.getOperand (0 ).getReg () == X86::EFLAGS)
263
- Copies.push_back (&MI);
285
+ Copies.insert (&MI);
286
+
287
+ // Try to elminate the copys by transform the instructions between copy and
288
+ // copydef to the NF (no flags update) variants, e.g.
289
+ //
290
+ // %1:gr64 = COPY $eflags
291
+ // OP1 implicit-def dead $eflags
292
+ // $eflags = COPY %1
293
+ // OP2 cc, implicit $eflags
294
+ //
295
+ // ->
296
+ //
297
+ // OP1_NF
298
+ // OP2 implicit $eflags
299
+ if (Subtarget->hasNF ()) {
300
+ SmallSetVector<MachineInstr *, 4 > RemovedCopies;
301
+ // CopyIIt may be invalidated by removing copies.
302
+ auto CopyIIt = Copies.begin (), CopyIEnd = Copies.end ();
303
+ while (CopyIIt != CopyIEnd) {
304
+ auto NCopyIIt = std::next (CopyIIt);
305
+ SmallSetVector<MachineInstr *, 4 > EvitableClobbers;
306
+ MachineInstr *CopyI = *CopyIIt;
307
+ MachineOperand &VOp = CopyI->getOperand (1 );
308
+ MachineInstr *CopyDefI = MRI->getVRegDef (VOp.getReg ());
309
+ MachineBasicBlock *CopyIMBB = CopyI->getParent ();
310
+ MachineBasicBlock *CopyDefIMBB = CopyDefI->getParent ();
311
+ // Walk all basic blocks reachable in depth-first iteration on the inverse
312
+ // CFG from CopyIMBB to CopyDefIMBB. These blocks are all the blocks that
313
+ // may be executed between the execution of CopyDefIMBB and CopyIMBB. On
314
+ // all execution paths, instructions from CopyDefI to CopyI (exclusive)
315
+ // has to be NF-convertible if it clobbers flags.
316
+ for (auto BI = idf_begin (CopyIMBB), BE = idf_end (CopyDefIMBB); BI != BE;
317
+ ++BI) {
318
+ MachineBasicBlock *MBB = *BI;
319
+ for (auto I = (MBB != CopyDefIMBB)
320
+ ? MBB->begin ()
321
+ : std::next (MachineBasicBlock::iterator (CopyDefI)),
322
+ E = (MBB != CopyIMBB) ? MBB->end ()
323
+ : MachineBasicBlock::iterator (CopyI);
324
+ I != E; ++I) {
325
+ MachineInstr &MI = *I;
326
+ EFLAGSClobber ClobberType = getClobberType (MI);
327
+ if (ClobberType == NoClobber)
328
+ continue ;
329
+
330
+ if (ClobberType == InevitableClobber)
331
+ goto ProcessNextCopyI;
332
+
333
+ assert (ClobberType == EvitableClobber && " unexpected workflow" );
334
+ EvitableClobbers.insert (&MI);
335
+ }
336
+ }
337
+ // Covert evitable clobbers into NF variants and remove the copyies.
338
+ RemovedCopies.insert (CopyI);
339
+ RemovedCopies.insert (CopyDefI);
340
+ CopyI->eraseFromParent ();
341
+ CopyDefI->eraseFromParent ();
342
+ ++NumCopiesEliminated;
343
+ for (auto *Clobber : EvitableClobbers) {
344
+ unsigned NewOpc = getNFVariant (Clobber->getOpcode ());
345
+ assert (NewOpc && " evitable clobber must have a NF variant" );
346
+ Clobber->setDesc (TII->get (NewOpc));
347
+ Clobber->removeOperand (
348
+ Clobber->findRegisterDefOperand (X86::EFLAGS, /* TRI=*/ nullptr )
349
+ ->getOperandNo ());
350
+ ++NumNFsConvertedTo;
351
+ }
352
+ ProcessNextCopyI:
353
+ CopyIIt = NCopyIIt;
354
+ }
355
+ Copies.set_subtract (RemovedCopies);
356
+ }
264
357
358
+ // For the rest of copies that cannot be eliminated by NF transform, we use
359
+ // setcc to preserve the flags in GPR32 before OP1, and recheck its value
360
+ // before using the flags, e.g.
361
+ //
362
+ // %1:gr64 = COPY $eflags
363
+ // OP1 implicit-def dead $eflags
364
+ // $eflags = COPY %1
365
+ // OP2 cc, implicit $eflags
366
+ //
367
+ // ->
368
+ //
369
+ // %1:gr8 = SETCCr cc, implicit $eflags
370
+ // OP1 implicit-def dead $eflags
371
+ // TEST8rr %1, %1, implicit-def $eflags
372
+ // OP2 ne, implicit $eflags
265
373
for (MachineInstr *CopyI : Copies) {
266
374
MachineBasicBlock &MBB = *CopyI->getParent ();
267
375
0 commit comments