Skip to content

Commit 901c5be

Browse files
authored
[AMDGPU] Fix GCNUpwardRPTracker: max register pressure on defs. (#74422)
Treat a defined register as fully live "at" the instruction and update maximum pressure accordingly. Fixes #3786.
1 parent cf47af4 commit 901c5be

File tree

3 files changed

+112
-135
lines changed

3 files changed

+112
-135
lines changed

llvm/lib/Target/AMDGPU/GCNRegPressure.cpp

Lines changed: 28 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -274,32 +274,42 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
274274
if (MI.isDebugInstr())
275275
return;
276276

277-
auto DecrementDef = [this](const MachineOperand &MO) {
277+
// Kill all defs.
278+
GCNRegPressure DefPressure, ECDefPressure;
279+
bool HasECDefs = false;
280+
for (const MachineOperand &MO : MI.all_defs()) {
281+
if (!MO.getReg().isVirtual())
282+
continue;
283+
278284
Register Reg = MO.getReg();
285+
LaneBitmask DefMask = getDefRegMask(MO, *MRI);
286+
287+
// Treat a def as fully live at the moment of definition: keep a record.
288+
if (MO.isEarlyClobber()) {
289+
ECDefPressure.inc(Reg, LaneBitmask::getNone(), DefMask, *MRI);
290+
HasECDefs = true;
291+
} else
292+
DefPressure.inc(Reg, LaneBitmask::getNone(), DefMask, *MRI);
293+
279294
auto I = LiveRegs.find(Reg);
280295
if (I == LiveRegs.end())
281-
return;
296+
continue;
282297

283298
LaneBitmask &LiveMask = I->second;
284299
LaneBitmask PrevMask = LiveMask;
285-
LiveMask &= ~getDefRegMask(MO, *MRI);
300+
LiveMask &= ~DefMask;
286301
CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
287302
if (LiveMask.none())
288303
LiveRegs.erase(I);
289-
};
290-
291-
// Decrement non-early-clobber defs.
292-
SmallVector<const MachineOperand *, 2> EarlyClobberDefs;
293-
for (const MachineOperand &MO : MI.all_defs()) {
294-
if (!MO.getReg().isVirtual())
295-
continue;
296-
if (!MO.isEarlyClobber())
297-
DecrementDef(MO);
298-
else
299-
EarlyClobberDefs.push_back(&MO);
300304
}
301305

302-
// Increment uses.
306+
// Update MaxPressure with defs pressure.
307+
DefPressure += CurPressure;
308+
if (HasECDefs)
309+
DefPressure += ECDefPressure;
310+
MaxPressure = max(DefPressure, MaxPressure);
311+
312+
// Make uses alive.
303313
SmallVector<RegisterMaskPair, 8> RegUses;
304314
collectVirtualRegUses(RegUses, MI, LIS, *MRI);
305315
for (const RegisterMaskPair &U : RegUses) {
@@ -309,13 +319,9 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
309319
CurPressure.inc(U.RegUnit, PrevMask, LiveMask, *MRI);
310320
}
311321

312-
// Point of maximum pressure: non-early-clobber defs are decremented and uses
313-
// are incremented.
314-
MaxPressure = max(CurPressure, MaxPressure);
315-
316-
// Now decrement early clobber defs.
317-
for (const MachineOperand *MO : EarlyClobberDefs)
318-
DecrementDef(*MO);
322+
// Update MaxPressure with uses plus early-clobber defs pressure.
323+
MaxPressure = HasECDefs ? max(CurPressure + ECDefPressure, MaxPressure)
324+
: max(CurPressure, MaxPressure);
319325

320326
assert(CurPressure == getRegPressure(*MRI, LiveRegs));
321327
}

llvm/lib/Target/AMDGPU/GCNRegPressure.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,18 @@ struct GCNRegPressure {
8585
return !(*this == O);
8686
}
8787

88+
GCNRegPressure &operator+=(const GCNRegPressure &RHS) {
89+
for (unsigned I = 0; I < TOTAL_KINDS; ++I)
90+
Value[I] += RHS.Value[I];
91+
return *this;
92+
}
93+
94+
GCNRegPressure &operator-=(const GCNRegPressure &RHS) {
95+
for (unsigned I = 0; I < TOTAL_KINDS; ++I)
96+
Value[I] -= RHS.Value[I];
97+
return *this;
98+
}
99+
88100
void dump() const;
89101

90102
private:
@@ -105,6 +117,20 @@ inline GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2) {
105117
return Res;
106118
}
107119

120+
inline GCNRegPressure operator+(const GCNRegPressure &P1,
121+
const GCNRegPressure &P2) {
122+
GCNRegPressure Sum = P1;
123+
Sum += P2;
124+
return Sum;
125+
}
126+
127+
inline GCNRegPressure operator-(const GCNRegPressure &P1,
128+
const GCNRegPressure &P2) {
129+
GCNRegPressure Diff = P1;
130+
Diff -= P2;
131+
return Diff;
132+
}
133+
108134
class GCNRPTracker {
109135
public:
110136
using LiveRegSet = DenseMap<unsigned, LaneBitmask>;

llvm/test/CodeGen/AMDGPU/regpressure_printer.mir

Lines changed: 58 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -47,87 +47,46 @@ body: |
4747
name: live_through_test
4848
tracksRegLiveness: true
4949
body: |
50-
; RPU-LABEL: name: live_through_test
51-
; RPU: bb.0:
52-
; RPU-NEXT: Live-in:
53-
; RPU-NEXT: SGPR VGPR
54-
; RPU-NEXT: 0 0
55-
; RPU-NEXT: 3 0 %0:sgpr_128 = IMPLICIT_DEF
56-
; RPU-NEXT: 3 0
57-
; RPU-NEXT: Live-out: %0:00000000000000F3
58-
; RPU-NEXT: Live-thr:
59-
; RPU-NEXT: 0 0
60-
; RPU-NEXT: bb.1:
61-
; RPU-NEXT: Live-in: %0:00000000000000F3
62-
; RPU-NEXT: SGPR VGPR
63-
; RPU-NEXT: 3 0
64-
; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128
65-
; RPU-NEXT: 2 0
66-
; RPU-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF
67-
; RPU-NEXT: 3 0
68-
; RPU-NEXT: 3 0 %0.sub1:sgpr_128 = IMPLICIT_DEF
69-
; RPU-NEXT: 3 0
70-
; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
71-
; RPU-NEXT: 2 0
72-
; RPU-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF
73-
; RPU-NEXT: 3 0
74-
; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
75-
; RPU-NEXT: 2 0
76-
; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
77-
; RPU-NEXT: 2 0
78-
; RPU-NEXT: Live-out: %0:00000000000000C3
79-
; RPU-NEXT: Live-thr: %0:00000000000000C0
80-
; RPU-NEXT: 1 0
81-
; RPU-NEXT: bb.2:
82-
; RPU-NEXT: Live-in: %0:00000000000000C3
83-
; RPU-NEXT: SGPR VGPR
84-
; RPU-NEXT: 2 0
85-
; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
86-
; RPU-NEXT: 0 0
87-
; RPU-NEXT: Live-out:
88-
; RPU-NEXT: Live-thr:
89-
; RPU-NEXT: 0 0
90-
;
91-
; RPD-LABEL: name: live_through_test
92-
; RPD: bb.0:
93-
; RPD-NEXT: Live-in:
94-
; RPD-NEXT: SGPR VGPR
95-
; RPD-NEXT: 0 0
96-
; RPD-NEXT: 4 0 %0:sgpr_128 = IMPLICIT_DEF
97-
; RPD-NEXT: 3 0
98-
; RPD-NEXT: Live-out: %0:00000000000000F3
99-
; RPD-NEXT: Live-thr:
100-
; RPD-NEXT: 0 0
101-
; RPD-NEXT: bb.1:
102-
; RPD-NEXT: Live-in: %0:00000000000000F3
103-
; RPD-NEXT: SGPR VGPR
104-
; RPD-NEXT: 3 0
105-
; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128
106-
; RPD-NEXT: 2 0
107-
; RPD-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF
108-
; RPD-NEXT: 3 0
109-
; RPD-NEXT: 4 0 %0.sub1:sgpr_128 = IMPLICIT_DEF
110-
; RPD-NEXT: 3 0
111-
; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
112-
; RPD-NEXT: 2 0
113-
; RPD-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF
114-
; RPD-NEXT: 3 0
115-
; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
116-
; RPD-NEXT: 2 0
117-
; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
118-
; RPD-NEXT: 2 0
119-
; RPD-NEXT: Live-out: %0:00000000000000C3
120-
; RPD-NEXT: Live-thr: %0:00000000000000C0
121-
; RPD-NEXT: 1 0
122-
; RPD-NEXT: bb.2:
123-
; RPD-NEXT: Live-in: %0:00000000000000C3
124-
; RPD-NEXT: SGPR VGPR
125-
; RPD-NEXT: 2 0
126-
; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
127-
; RPD-NEXT: 0 0
128-
; RPD-NEXT: Live-out:
129-
; RPD-NEXT: Live-thr:
130-
; RPD-NEXT: 0 0
50+
; RP-LABEL: name: live_through_test
51+
; RP: bb.0:
52+
; RP-NEXT: Live-in:
53+
; RP-NEXT: SGPR VGPR
54+
; RP-NEXT: 0 0
55+
; RP-NEXT: 4 0 %0:sgpr_128 = IMPLICIT_DEF
56+
; RP-NEXT: 3 0
57+
; RP-NEXT: Live-out: %0:00000000000000F3
58+
; RP-NEXT: Live-thr:
59+
; RP-NEXT: 0 0
60+
; RP-NEXT: bb.1:
61+
; RP-NEXT: Live-in: %0:00000000000000F3
62+
; RP-NEXT: SGPR VGPR
63+
; RP-NEXT: 3 0
64+
; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128
65+
; RP-NEXT: 2 0
66+
; RP-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF
67+
; RP-NEXT: 3 0
68+
; RP-NEXT: 4 0 %0.sub1:sgpr_128 = IMPLICIT_DEF
69+
; RP-NEXT: 3 0
70+
; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
71+
; RP-NEXT: 2 0
72+
; RP-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF
73+
; RP-NEXT: 3 0
74+
; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
75+
; RP-NEXT: 2 0
76+
; RP-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
77+
; RP-NEXT: 2 0
78+
; RP-NEXT: Live-out: %0:00000000000000C3
79+
; RP-NEXT: Live-thr: %0:00000000000000C0
80+
; RP-NEXT: 1 0
81+
; RP-NEXT: bb.2:
82+
; RP-NEXT: Live-in: %0:00000000000000C3
83+
; RP-NEXT: SGPR VGPR
84+
; RP-NEXT: 2 0
85+
; RP-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
86+
; RP-NEXT: 0 0
87+
; RP-NEXT: Live-out:
88+
; RP-NEXT: Live-thr:
89+
; RP-NEXT: 0 0
13190
bb.0:
13291
%0:sgpr_128 = IMPLICIT_DEF
13392
bb.1:
@@ -223,7 +182,7 @@ body: |
223182
; RPU-NEXT: 0 7
224183
; RPU-NEXT: 0 7 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
225184
; RPU-NEXT: 0 6
226-
; RPU-NEXT: 0 7 %8:vreg_64 = IMPLICIT_DEF
185+
; RPU-NEXT: 0 8 %8:vreg_64 = IMPLICIT_DEF
227186
; RPU-NEXT: 0 7
228187
; RPU-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF
229188
; RPU-NEXT: 0 9
@@ -262,7 +221,7 @@ body: |
262221
; RPU-NEXT: 0 12
263222
; RPU-NEXT: 0 12 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
264223
; RPU-NEXT: 0 10
265-
; RPU-NEXT: 0 10 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
224+
; RPU-NEXT: 0 11 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
266225
; RPU-NEXT: 0 10
267226
; RPU-NEXT: 0 10 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
268227
; RPU-NEXT: 0 9
@@ -550,7 +509,7 @@ body: |
550509
; RPU-NEXT: 0 0
551510
; RPU-NEXT: 0 0 $sgpr0 = S_BUFFER_LOAD_DWORD_IMM $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0
552511
; RPU-NEXT: 0 0
553-
; RPU-NEXT: 0 0 undef %0.sub5:vreg_512 = V_MOV_B32_e32 5, implicit $exec
512+
; RPU-NEXT: 0 1 undef %0.sub5:vreg_512 = V_MOV_B32_e32 5, implicit $exec
554513
; RPU-NEXT: 0 0
555514
; RPU-NEXT: 0 0 S_CMP_GT_U32 $sgpr0, 15, implicit-def $scc
556515
; RPU-NEXT: 0 0
@@ -569,7 +528,7 @@ body: |
569528
; RPU-NEXT: 0 1
570529
; RPU-NEXT: 0 1 $m0 = S_MOV_B32 killed $sgpr0
571530
; RPU-NEXT: 0 1
572-
; RPU-NEXT: 0 1 %0:vreg_512 = V_INDIRECT_REG_WRITE_MOVREL_B32_V16 %0:vreg_512(tied-def 0), 42, 3, implicit $m0, implicit $exec
531+
; RPU-NEXT: 0 16 %0:vreg_512 = V_INDIRECT_REG_WRITE_MOVREL_B32_V16 %0:vreg_512(tied-def 0), 42, 3, implicit $m0, implicit $exec
573532
; RPU-NEXT: 0 1
574533
; RPU-NEXT: Live-out: %0:0000000000000C00
575534
; RPU-NEXT: Live-thr:
@@ -709,33 +668,19 @@ tracksRegLiveness: true
709668
body: |
710669
bb.0:
711670
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
712-
; RPU-LABEL: name: test_partially_used_early_clobber_def
713-
; RPU: Live-in:
714-
; RPU-NEXT: SGPR VGPR
715-
; RPU-NEXT: 0 0
716-
; RPU-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
717-
; RPU-NEXT: 4 0
718-
; RPU-NEXT: 5 0 early-clobber %1:sgpr_128 = COPY %0:sgpr_128
719-
; RPU-NEXT: 1 0
720-
; RPU-NEXT: 1 0 S_NOP 0, implicit %1.sub1:sgpr_128
721-
; RPU-NEXT: 0 0
722-
; RPU-NEXT: Live-out:
723-
; RPU-NEXT: Live-thr:
724-
; RPU-NEXT: 0 0
725-
;
726-
; RPD-LABEL: name: test_partially_used_early_clobber_def
727-
; RPD: Live-in:
728-
; RPD-NEXT: SGPR VGPR
729-
; RPD-NEXT: 0 0
730-
; RPD-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
731-
; RPD-NEXT: 4 0
732-
; RPD-NEXT: 8 0 early-clobber %1:sgpr_128 = COPY %0:sgpr_128
733-
; RPD-NEXT: 1 0
734-
; RPD-NEXT: 1 0 S_NOP 0, implicit %1.sub1:sgpr_128
735-
; RPD-NEXT: 0 0
736-
; RPD-NEXT: Live-out:
737-
; RPD-NEXT: Live-thr:
738-
; RPD-NEXT: 0 0
671+
; RP-LABEL: name: test_partially_used_early_clobber_def
672+
; RP: Live-in:
673+
; RP-NEXT: SGPR VGPR
674+
; RP-NEXT: 0 0
675+
; RP-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
676+
; RP-NEXT: 4 0
677+
; RP-NEXT: 8 0 early-clobber %1:sgpr_128 = COPY %0:sgpr_128
678+
; RP-NEXT: 1 0
679+
; RP-NEXT: 1 0 S_NOP 0, implicit %1.sub1:sgpr_128
680+
; RP-NEXT: 0 0
681+
; RP-NEXT: Live-out:
682+
; RP-NEXT: Live-thr:
683+
; RP-NEXT: 0 0
739684
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
740685
early-clobber %1:sgpr_128 = COPY %0:sgpr_128
741686
S_NOP 0, implicit %1.sub1
@@ -752,7 +697,7 @@ body: |
752697
; RPU-NEXT: 0 0
753698
; RPU-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
754699
; RPU-NEXT: 4 0
755-
; RPU-NEXT: 7 0 %1:sgpr_128 = COPY %0:sgpr_128, implicit-def %2:sgpr_128, implicit-def early-clobber %3:sgpr_128, implicit-def dead early-clobber %4:sgpr_128
700+
; RPU-NEXT: 16 0 %1:sgpr_128 = COPY %0:sgpr_128, implicit-def %2:sgpr_128, implicit-def early-clobber %3:sgpr_128, implicit-def dead early-clobber %4:sgpr_128
756701
; RPU-NEXT: 6 0
757702
; RPU-NEXT: 6 0 S_NOP 0, implicit %1.sub1:sgpr_128, implicit %2.sub0_sub1:sgpr_128, implicit %3.sub0_sub1_sub2:sgpr_128
758703
; RPU-NEXT: 0 0

0 commit comments

Comments
 (0)