Skip to content

[AMDGPU] Fix GCUpwardRPTracker. #74328

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 33 additions & 38 deletions llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,18 +183,13 @@ collectVirtualRegUses(SmallVectorImpl<RegisterMaskPair> &RegMaskPairs,
}))
continue;

LaneBitmask UseMask;
auto &LI = LIS.getInterval(Reg);
if (!LI.hasSubRanges())
UseMask = MRI.getMaxLaneMaskForVReg(Reg);
else {
// For a tentative schedule LIS isn't updated yet but livemask should
// remain the same on any schedule. Subreg defs can be reordered but they
// all must dominate uses anyway.
if (!InstrSI)
InstrSI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex();
UseMask = getLiveLaneMask(LI, InstrSI, MRI);
}
if (!InstrSI)
InstrSI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex();

// For a tentative schedule LIS isn't updated yet but livemask should
// remain the same on any schedule. Subreg defs can be reordered but they
// all must dominate uses anyway.
LaneBitmask UseMask = getLiveLaneMask(LIS.getInterval(Reg), InstrSI, MRI);

RegMaskPairs.emplace_back(Reg, UseMask);
}
Expand Down Expand Up @@ -274,48 +269,48 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
if (MI.isDebugInstr())
return;

auto DecrementDef = [this](const MachineOperand &MO) {
// Kill all defs.
GCNRegPressure DefPressure, ECDefPressure;
for (const MachineOperand &MO : MI.all_defs()) {
if (!MO.getReg().isVirtual())
continue;

Register Reg = MO.getReg();
LaneBitmask DefMask = getDefRegMask(MO, *MRI);

// Treat a def as fully live at the moment of definition: keep a record.
(MO.isEarlyClobber() ? &ECDefPressure : &DefPressure)
->inc(Reg, LaneBitmask::getNone(), DefMask, *MRI);

auto I = LiveRegs.find(Reg);
if (I == LiveRegs.end())
return;
continue;

LaneBitmask &LiveMask = I->second;
LaneBitmask PrevMask = LiveMask;
LiveMask &= ~getDefRegMask(MO, *MRI);
LiveMask &= ~DefMask;
CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
if (LiveMask.none())
LiveRegs.erase(I);
};

// Decrement non-early-clobber defs.
SmallVector<const MachineOperand *, 2> EarlyClobberDefs;
for (const MachineOperand &MO : MI.all_defs()) {
if (!MO.getReg().isVirtual())
continue;
if (!MO.isEarlyClobber())
DecrementDef(MO);
else
EarlyClobberDefs.push_back(&MO);
}

// Increment uses.
// Update MaxPressure with defs pressure.
MaxPressure = max(CurPressure + DefPressure + ECDefPressure, MaxPressure);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It probably worth to add ECDefPressure conditionally as early-clobbers are very rare (if any).


// Make uses alive.
SmallVector<RegisterMaskPair, 8> RegUses;
collectVirtualRegUses(RegUses, MI, LIS, *MRI);
for (const RegisterMaskPair &U : RegUses) {
LaneBitmask &LiveMask = LiveRegs[U.RegUnit];
for (auto [Reg, LaneMask] : RegUses) {
if (LaneMask.none())
continue;
LaneBitmask &LiveMask = LiveRegs[Reg];
LaneBitmask PrevMask = LiveMask;
LiveMask |= U.LaneMask;
CurPressure.inc(U.RegUnit, PrevMask, LiveMask, *MRI);
LiveMask |= LaneMask;
CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
}

// Point of maximum pressure: non-early-clobber defs are decremented and uses
// are incremented.
MaxPressure = max(CurPressure, MaxPressure);

// Now decrement early clobber defs.
for (const MachineOperand *MO : EarlyClobberDefs)
DecrementDef(*MO);
// Update MaxPressure with all uses alive plus early-clobber defs pressure.
MaxPressure = max(CurPressure + ECDefPressure, MaxPressure);

assert(CurPressure == getRegPressure(*MRI, LiveRegs));
}
Expand Down
26 changes: 26 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNRegPressure.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,18 @@ struct GCNRegPressure {
return !(*this == O);
}

GCNRegPressure &operator+=(const GCNRegPressure &RHS) {
for (unsigned I = 0; I < TOTAL_KINDS; ++I)
Value[I] += RHS.Value[I];
return *this;
}

GCNRegPressure &operator-=(const GCNRegPressure &RHS) {
for (unsigned I = 0; I < TOTAL_KINDS; ++I)
Value[I] -= RHS.Value[I];
return *this;
}

void dump() const;

private:
Expand All @@ -105,6 +117,20 @@ inline GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2) {
return Res;
}

inline GCNRegPressure operator+(const GCNRegPressure &P1,
const GCNRegPressure &P2) {
GCNRegPressure Sum = P1;
Sum += P2;
return Sum;
}

inline GCNRegPressure operator-(const GCNRegPressure &P1,
const GCNRegPressure &P2) {
GCNRegPressure Diff = P1;
Diff -= P2;
return Diff;
}

class GCNRPTracker {
public:
using LiveRegSet = DenseMap<unsigned, LaneBitmask>;
Expand Down
196 changes: 111 additions & 85 deletions llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
Original file line number Diff line number Diff line change
Expand Up @@ -47,87 +47,46 @@ body: |
name: live_through_test
tracksRegLiveness: true
body: |
; RPU-LABEL: name: live_through_test
; RPU: bb.0:
; RPU-NEXT: Live-in:
; RPU-NEXT: SGPR VGPR
; RPU-NEXT: 0 0
; RPU-NEXT: 3 0 %0:sgpr_128 = IMPLICIT_DEF
; RPU-NEXT: 3 0
; RPU-NEXT: Live-out: %0:00000000000000F3
; RPU-NEXT: Live-thr:
; RPU-NEXT: 0 0
; RPU-NEXT: bb.1:
; RPU-NEXT: Live-in: %0:00000000000000F3
; RPU-NEXT: SGPR VGPR
; RPU-NEXT: 3 0
; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128
; RPU-NEXT: 2 0
; RPU-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF
; RPU-NEXT: 3 0
; RPU-NEXT: 3 0 %0.sub1:sgpr_128 = IMPLICIT_DEF
; RPU-NEXT: 3 0
; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
; RPU-NEXT: 2 0
; RPU-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF
; RPU-NEXT: 3 0
; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
; RPU-NEXT: 2 0
; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
; RPU-NEXT: 2 0
; RPU-NEXT: Live-out: %0:00000000000000C3
; RPU-NEXT: Live-thr: %0:00000000000000C0
; RPU-NEXT: 1 0
; RPU-NEXT: bb.2:
; RPU-NEXT: Live-in: %0:00000000000000C3
; RPU-NEXT: SGPR VGPR
; RPU-NEXT: 2 0
; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
; RPU-NEXT: 0 0
; RPU-NEXT: Live-out:
; RPU-NEXT: Live-thr:
; RPU-NEXT: 0 0
;
; RPD-LABEL: name: live_through_test
; RPD: bb.0:
; RPD-NEXT: Live-in:
; RPD-NEXT: SGPR VGPR
; RPD-NEXT: 0 0
; RPD-NEXT: 4 0 %0:sgpr_128 = IMPLICIT_DEF
; RPD-NEXT: 3 0
; RPD-NEXT: Live-out: %0:00000000000000F3
; RPD-NEXT: Live-thr:
; RPD-NEXT: 0 0
; RPD-NEXT: bb.1:
; RPD-NEXT: Live-in: %0:00000000000000F3
; RPD-NEXT: SGPR VGPR
; RPD-NEXT: 3 0
; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128
; RPD-NEXT: 2 0
; RPD-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF
; RPD-NEXT: 3 0
; RPD-NEXT: 4 0 %0.sub1:sgpr_128 = IMPLICIT_DEF
; RPD-NEXT: 3 0
; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
; RPD-NEXT: 2 0
; RPD-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF
; RPD-NEXT: 3 0
; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
; RPD-NEXT: 2 0
; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
; RPD-NEXT: 2 0
; RPD-NEXT: Live-out: %0:00000000000000C3
; RPD-NEXT: Live-thr: %0:00000000000000C0
; RPD-NEXT: 1 0
; RPD-NEXT: bb.2:
; RPD-NEXT: Live-in: %0:00000000000000C3
; RPD-NEXT: SGPR VGPR
; RPD-NEXT: 2 0
; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
; RPD-NEXT: 0 0
; RPD-NEXT: Live-out:
; RPD-NEXT: Live-thr:
; RPD-NEXT: 0 0
; RP-LABEL: name: live_through_test
; RP: bb.0:
; RP-NEXT: Live-in:
; RP-NEXT: SGPR VGPR
; RP-NEXT: 0 0
; RP-NEXT: 4 0 %0:sgpr_128 = IMPLICIT_DEF
; RP-NEXT: 3 0
; RP-NEXT: Live-out: %0:00000000000000F3
; RP-NEXT: Live-thr:
; RP-NEXT: 0 0
; RP-NEXT: bb.1:
; RP-NEXT: Live-in: %0:00000000000000F3
; RP-NEXT: SGPR VGPR
; RP-NEXT: 3 0
; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128
; RP-NEXT: 2 0
; RP-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF
; RP-NEXT: 3 0
; RP-NEXT: 4 0 %0.sub1:sgpr_128 = IMPLICIT_DEF
; RP-NEXT: 3 0
; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
; RP-NEXT: 2 0
; RP-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF
; RP-NEXT: 3 0
; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
; RP-NEXT: 2 0
; RP-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
; RP-NEXT: 2 0
; RP-NEXT: Live-out: %0:00000000000000C3
; RP-NEXT: Live-thr: %0:00000000000000C0
; RP-NEXT: 1 0
; RP-NEXT: bb.2:
; RP-NEXT: Live-in: %0:00000000000000C3
; RP-NEXT: SGPR VGPR
; RP-NEXT: 2 0
; RP-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
; RP-NEXT: 0 0
; RP-NEXT: Live-out:
; RP-NEXT: Live-thr:
; RP-NEXT: 0 0
bb.0:
%0:sgpr_128 = IMPLICIT_DEF
bb.1:
Expand Down Expand Up @@ -223,7 +182,7 @@ body: |
; RPU-NEXT: 0 7
; RPU-NEXT: 0 7 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
; RPU-NEXT: 0 6
; RPU-NEXT: 0 7 %8:vreg_64 = IMPLICIT_DEF
; RPU-NEXT: 0 8 %8:vreg_64 = IMPLICIT_DEF
; RPU-NEXT: 0 7
; RPU-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF
; RPU-NEXT: 0 9
Expand Down Expand Up @@ -262,7 +221,7 @@ body: |
; RPU-NEXT: 0 12
; RPU-NEXT: 0 12 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
; RPU-NEXT: 0 10
; RPU-NEXT: 0 10 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
; RPU-NEXT: 0 11 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
; RPU-NEXT: 0 10
; RPU-NEXT: 0 10 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
; RPU-NEXT: 0 9
Expand Down Expand Up @@ -550,7 +509,7 @@ body: |
; RPU-NEXT: 0 0
; RPU-NEXT: 0 0 $sgpr0 = S_BUFFER_LOAD_DWORD_IMM $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0
; RPU-NEXT: 0 0
; RPU-NEXT: 0 0 undef %0.sub5:vreg_512 = V_MOV_B32_e32 5, implicit $exec
; RPU-NEXT: 0 1 undef %0.sub5:vreg_512 = V_MOV_B32_e32 5, implicit $exec
; RPU-NEXT: 0 0
; RPU-NEXT: 0 0 S_CMP_GT_U32 $sgpr0, 15, implicit-def $scc
; RPU-NEXT: 0 0
Expand All @@ -569,7 +528,7 @@ body: |
; RPU-NEXT: 0 1
; RPU-NEXT: 0 1 $m0 = S_MOV_B32 killed $sgpr0
; RPU-NEXT: 0 1
; RPU-NEXT: 0 1 %0:vreg_512 = V_INDIRECT_REG_WRITE_MOVREL_B32_V16 %0:vreg_512(tied-def 0), 42, 3, implicit $m0, implicit $exec
; RPU-NEXT: 0 16 %0:vreg_512 = V_INDIRECT_REG_WRITE_MOVREL_B32_V16 %0:vreg_512(tied-def 0), 42, 3, implicit $m0, implicit $exec
; RPU-NEXT: 0 1
; RPU-NEXT: Live-out: %0:0000000000000C00
; RPU-NEXT: Live-thr:
Expand Down Expand Up @@ -666,3 +625,70 @@ body: |
EXP_DONE 0, %49:vgpr_32, undef %51:vgpr_32, undef %53:vgpr_32, undef %55:vgpr_32, -1, 0, 1, implicit $exec
S_ENDPGM 0
...
---
name: early_clobber_def_used_on_rhs
registers:
- { id: 0, class: vgpr_32 }
body: |
; RPU-LABEL: name: early_clobber_def_used_on_rhs
; RPU: bb.0:
; RPU-NEXT: Live-in:
; RPU-NEXT: SGPR VGPR
; RPU-NEXT: 0 0
; RPU-NEXT: 0 1 dead %3:vgpr_32 = COPY $vgpr0
; RPU-NEXT: 0 0
; RPU-NEXT: 0 1 early-clobber %2:vgpr_32 = COPY %0:vgpr_32
Comment on lines +638 to +640
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where did %2 and %3 come from? The input only has %0. Anyway this PR seems to be failing its own tests, at least with assertions enabled.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mir parser renames those registers, I haven't managed to prevent it doing that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually this isn't MIR parser, this is done by LiveIntervals analysis.

Firstly, I forgot to add tracksRegLiveness: true line in the test - LiveIntervals requires that. But even with the flag, the verifier fails like this:

********** INTERVALS **********
VGPR0_LO16 [0B,16r:0) 0@0B-phi
VGPR0_HI16 [0B,16r:0) 0@0B-phi
%0 [32e,48r:0) 0@32e  weight:0.000000e+00
%1 [16r,16d:0) 0@16r  weight:0.000000e+00
RegMasks:
********** MACHINEINSTRS **********
# Machine code for function early_clobber_def_used_on_rhs: NoPHIs, TracksLiveness

0B	bb.0:
	  liveins: $vgpr0
16B	  dead %1:vgpr_32 = COPY $vgpr0
32B	  early-clobber %0:vgpr_32 = COPY %0:vgpr_32
48B	  S_NOP 0, implicit %0:vgpr_32

# End machine code for function early_clobber_def_used_on_rhs.

*** Bad machine code: No live segment at use ***
- function:    early_clobber_def_used_on_rhs
- basic block: %bb.0  (0x1f9fb80) [0B;64B)
- instruction: 32B	early-clobber %0:vgpr_32 = COPY %0:vgpr_32
- operand 1:   %0:vgpr_32
- liverange:   [32e,48r:0) 0@32e
- v. register: %0
- at:          32B

Verifier complains that there is no live segment at 32B slot index, that is at the entry to 32 instruction. LiveIntervals correctly determined that the value produced at 16R is dead because it's clobbered by the def at 32E before the use at 32R. It renamed %0 at 16R to %1 and marked it as dead but I think it should also mark the %0 use at 32R as 'undef' for this IR to be valid. If I add 'undef' flag there manually everything works fine:

bb.0:
    Live-in: 
    SGPR  VGPR
    0     0    
    0     1      dead %1:vgpr_32 = COPY $vgpr0
    0     0    
    0     1      early-clobber %0:vgpr_32 = COPY undef %0:vgpr_32
    0     1    
    0     1      S_NOP 0, implicit %0:vgpr_32
    0     0    

So I think current implementation of GCNUpwardRPTracker is correct if it works on correct IR. LiveIntervals should be fixed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On the other hand MachineFunctionProperties::Property::TracksLiveness property that is required by LiveIntervals has the following description, include/llvm/CodeGen/MachineFunction.h:143:

  // TracksLiveness: True when tracking register liveness accurately.
  //  While this property is set, register liveness information in basic block
  //  live-in lists and machine instruction operands (e.g. implicit defs) is
  //  accurate, kill flags are conservatively accurate (kill flag correctly
  //  indicates the last use of a register, an operand without kill flag may or
  //  may not be the last use of a register). This means it can be used to
  //  change the code in ways that affect the values in registers, for example
  //  by the register scavenger.

This may mean that 'undef' flag should have been set manually in the testcase to be valid so I'm not sure. Maybe I should just add a test with 'undef' flag and get away with it.

; RPU-NEXT: 0 1
; RPU-NEXT: 0 1 S_NOP 0, implicit %2:vgpr_32
; RPU-NEXT: 0 0
; RPU-NEXT: Live-out:
; RPU-NEXT: Live-thr:
; RPU-NEXT: 0 0
; RPU-NEXT: bb.1:
; RPU-NEXT: Live-in:
; RPU-NEXT: SGPR VGPR
; RPU-NEXT: 0 0
; RPU-NEXT: 0 1 dead %1:vgpr_32 = COPY $vgpr0
; RPU-NEXT: 0 0
; RPU-NEXT: 0 1 dead %0:vgpr_32 = COPY $vgpr0
; RPU-NEXT: 0 0
; RPU-NEXT: Live-out:
; RPU-NEXT: Live-thr:
; RPU-NEXT: 0 0
;
; RPD-LABEL: name: early_clobber_def_used_on_rhs
; RPD: bb.0:
; RPD-NEXT: Live-in:
; RPD-NEXT: SGPR VGPR
; RPD-NEXT: 0 0
; RPD-NEXT: 0 1 dead %3:vgpr_32 = COPY $vgpr0
; RPD-NEXT: 0 0
; RPD-NEXT: 0 1 early-clobber %2:vgpr_32 = COPY %0:vgpr_32
; RPD-NEXT: 0 0
; RPD-NEXT: 0 0 S_NOP 0, implicit %2:vgpr_32
; RPD-NEXT: 0 -1
; RPD-NEXT: Live-out:
; RPD-NEXT: mis LIS:
; RPD-NEXT: Live-thr:
; RPD-NEXT: 0 0
; RPD-NEXT: bb.1:
; RPD-NEXT: Live-in:
; RPD-NEXT: SGPR VGPR
; RPD-NEXT: 0 0
; RPD-NEXT: 0 1 dead %1:vgpr_32 = COPY $vgpr0
; RPD-NEXT: 0 0
; RPD-NEXT: 0 1 dead %0:vgpr_32 = COPY $vgpr0
; RPD-NEXT: 0 0
; RPD-NEXT: Live-out:
; RPD-NEXT: Live-thr:
; RPD-NEXT: 0 0
bb.0:
liveins: $vgpr0
%0 = COPY $vgpr0
early-clobber %0 = COPY %0
S_NOP 0, implicit %0
bb.1:
liveins: $vgpr0
%0 = COPY $vgpr0
%0 = COPY $vgpr0 ; Force isSSA = false
...