10
10
#include " GCNSubtarget.h"
11
11
#include " MCTargetDesc/AMDGPUMCTargetDesc.h"
12
12
#include " SIRegisterInfo.h"
13
+ #include " llvm/ADT/SmallVector.h"
13
14
#include " llvm/CodeGen/LivePhysRegs.h"
14
15
#include " llvm/CodeGen/MachineFunctionPass.h"
15
16
#include " llvm/CodeGen/MachineOperand.h"
@@ -32,6 +33,7 @@ class SIOptimizeExecMasking : public MachineFunctionPass {
32
33
33
34
DenseMap<MachineInstr *, MachineInstr *> SaveExecVCmpMapping;
34
35
SmallVector<std::pair<MachineInstr *, MachineInstr *>, 1 > OrXors;
36
+ SmallVector<MachineOperand *, 1 > KillFlagCandidates;
35
37
36
38
Register isCopyFromExec (const MachineInstr &MI) const ;
37
39
Register isCopyToExec (const MachineInstr &MI) const ;
@@ -41,15 +43,16 @@ class SIOptimizeExecMasking : public MachineFunctionPass {
41
43
MachineBasicBlock::reverse_iterator
42
44
findExecCopy (MachineBasicBlock &MBB,
43
45
MachineBasicBlock::reverse_iterator I) const ;
44
-
45
46
bool isRegisterInUseBetween (MachineInstr &Stop, MachineInstr &Start,
46
47
MCRegister Reg, bool UseLiveOuts = false ,
47
48
bool IgnoreStart = false ) const ;
48
49
bool isRegisterInUseAfter (MachineInstr &Stop, MCRegister Reg) const ;
49
- MachineInstr *findInstrBackwards (MachineInstr &Origin,
50
- std::function<bool (MachineInstr *)> Pred,
51
- ArrayRef<MCRegister> NonModifiableRegs,
52
- unsigned MaxInstructions = 20) const ;
50
+ MachineInstr *findInstrBackwards (
51
+ MachineInstr &Origin, std::function<bool (MachineInstr *)> Pred,
52
+ ArrayRef<MCRegister> NonModifiableRegs,
53
+ MachineInstr *Terminator = nullptr,
54
+ SmallVectorImpl<MachineOperand *> *KillFlagCandidates = nullptr,
55
+ unsigned MaxInstructions = 20) const ;
53
56
bool optimizeExecSequence ();
54
57
void tryRecordVCmpxAndSaveexecSequence (MachineInstr &MI);
55
58
bool optimizeVCMPSaveExecSequence (MachineInstr &SaveExecInstr,
@@ -325,11 +328,13 @@ static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg) {
325
328
// Backwards-iterate from Origin (for n=MaxInstructions iterations) until either
326
329
// the beginning of the BB is reached or Pred evaluates to true - which can be
327
330
// an arbitrary condition based on the current MachineInstr, for instance an
328
- // target instruction. Breaks prematurely by returning nullptr if one of the
331
+ // target instruction. Breaks prematurely by returning nullptr if one of the
329
332
// registers given in NonModifiableRegs is modified by the current instruction.
330
333
MachineInstr *SIOptimizeExecMasking::findInstrBackwards (
331
334
MachineInstr &Origin, std::function<bool (MachineInstr *)> Pred,
332
- ArrayRef<MCRegister> NonModifiableRegs, unsigned MaxInstructions) const {
335
+ ArrayRef<MCRegister> NonModifiableRegs, MachineInstr *Terminator,
336
+ SmallVectorImpl<MachineOperand *> *KillFlagCandidates,
337
+ unsigned MaxInstructions) const {
333
338
MachineBasicBlock::reverse_iterator A = Origin.getReverseIterator (),
334
339
E = Origin.getParent ()->rend ();
335
340
unsigned CurrentIteration = 0 ;
@@ -344,6 +349,21 @@ MachineInstr *SIOptimizeExecMasking::findInstrBackwards(
344
349
for (MCRegister Reg : NonModifiableRegs) {
345
350
if (A->modifiesRegister (Reg, TRI))
346
351
return nullptr ;
352
+
353
+ // Check for kills that appear after the terminator instruction, that
354
+ // would not be detected by clearKillFlags, since they will cause the
355
+ // register to be dead at a later place, causing the verifier to fail.
356
+ // We use the candidates to clear the kill flags later.
357
+ if (Terminator && KillFlagCandidates && A != Terminator &&
358
+ A->killsRegister (Reg, TRI)) {
359
+ for (MachineOperand &MO : A->operands ()) {
360
+ if (MO.isReg () && MO.isKill ()) {
361
+ Register Candidate = MO.getReg ();
362
+ if (Candidate != Reg && TRI->regsOverlap (Candidate, Reg))
363
+ KillFlagCandidates->push_back (&MO);
364
+ }
365
+ }
366
+ }
347
367
}
348
368
349
369
++CurrentIteration;
@@ -599,6 +619,9 @@ bool SIOptimizeExecMasking::optimizeVCMPSaveExecSequence(
599
619
if (Src1->isReg ())
600
620
MRI->clearKillFlags (Src1->getReg ());
601
621
622
+ for (MachineOperand *MO : KillFlagCandidates)
623
+ MO->setIsKill (false );
624
+
602
625
SaveExecInstr.eraseFromParent ();
603
626
VCmp.eraseFromParent ();
604
627
@@ -690,7 +713,8 @@ void SIOptimizeExecMasking::tryRecordVCmpxAndSaveexecSequence(
690
713
NonDefRegs.push_back (Src1->getReg ());
691
714
692
715
if (!findInstrBackwards (
693
- MI, [&](MachineInstr *Check) { return Check == VCmp; }, NonDefRegs))
716
+ MI, [&](MachineInstr *Check) { return Check == VCmp; }, NonDefRegs,
717
+ VCmp, &KillFlagCandidates))
694
718
return ;
695
719
696
720
if (VCmp)
@@ -777,6 +801,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
777
801
778
802
OrXors.clear ();
779
803
SaveExecVCmpMapping.clear ();
804
+ KillFlagCandidates.clear ();
780
805
static unsigned SearchWindow = 10 ;
781
806
for (MachineBasicBlock &MBB : MF) {
782
807
unsigned SearchCount = 0 ;
0 commit comments