Skip to content

Commit 549f6a8

Browse files
committed
[MachineCopyPropagation] Check CrossCopyRegClass for cross-class copys
On some AMDGPU subtargets, copying to and from AGPR registers using another AGPR register is not possible. A intermediate VGPR register is needed for AGPR to AGPR copy. This is an issue when machine copy propagation forwards a COPY $agpr, replacing a COPY $vgpr which results in $agpr = COPY $agpr. It is removing a cross class copy that may have been optimized by previous passes and potentially creating an unoptimized cross class copy later on. To avoid this issue, check CrossCopyRegClass if a different register class will be needed for the copy. If so then avoid forwarding the copy when the destination does not match the desired register class and if the original copy already matches the desired register class. Issue seen while attempting to optimize another AGPR to AGPR issue: Live-ins: $agpr0 $vgpr0 = COPY $agpr0 $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr0 $agpr2 = COPY $vgpr0 $agpr3 = COPY $vgpr0 $agpr4 = COPY $vgpr0 After machine-cp: $vgpr0 = COPY $agpr0 $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr0 $agpr2 = COPY $agpr0 $agpr3 = COPY $agpr0 $agpr4 = COPY $agpr0 Machine-cp propagated COPY $agpr0 to replace $vgpr0 creating 3 AGPR to AGPR copys. Later this creates a cross-register copy from AGPR->VGPR->AGPR for each copy when the prior VGPR->AGPR copy was already optimal. Reviewed By: lkail, rampitec Differential Revision: https://reviews.llvm.org/D108011
1 parent 2a35d59 commit 549f6a8

File tree

4 files changed

+110
-3
lines changed

4 files changed

+110
-3
lines changed

llvm/lib/CodeGen/MachineCopyPropagation.cpp

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,31 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy,
414414
if (!UseI.isCopy())
415415
return false;
416416

417+
const TargetRegisterClass *CopySrcRC =
418+
TRI->getMinimalPhysRegClass(CopySrcReg);
419+
const TargetRegisterClass *UseDstRC =
420+
TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg());
421+
const TargetRegisterClass *CrossCopyRC = TRI->getCrossCopyRegClass(CopySrcRC);
422+
423+
// If cross copy register class is not the same as copy source register class
424+
// then it is not possible to copy the register directly and requires a cross
425+
// register class copy. Fowarding this copy without checking register class of
426+
// UseDst may create additional cross register copies when expanding the copy
427+
// instruction in later passes.
428+
if (CopySrcRC != CrossCopyRC) {
429+
const TargetRegisterClass *CopyDstRC =
430+
TRI->getMinimalPhysRegClass(Copy.getOperand(0).getReg());
431+
432+
// Check if UseDstRC matches the necessary register class to copy from
433+
// CopySrc's register class. If so then forwarding the copy will not
434+
// introduce any cross-class copys. Else if CopyDstRC matches then keep the
435+
// copy and do not forward. If neither UseDstRC or CopyDstRC matches then
436+
// we may need a cross register copy later but we do not worry about it
437+
// here.
438+
if (UseDstRC != CrossCopyRC && CopyDstRC == CrossCopyRC)
439+
return false;
440+
}
441+
417442
/// COPYs don't have register class constraints, so if the user instruction
418443
/// is a COPY, we just try to avoid introducing additional cross-class
419444
/// COPYs. For example:
@@ -430,9 +455,6 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy,
430455
///
431456
/// so we have reduced the number of cross-class COPYs and potentially
432457
/// introduced a nop COPY that can be removed.
433-
const TargetRegisterClass *UseDstRC =
434-
TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg());
435-
436458
const TargetRegisterClass *SuperRC = UseDstRC;
437459
for (TargetRegisterClass::sc_iterator SuperRCI = UseDstRC->getSuperClasses();
438460
SuperRC; SuperRC = *SuperRCI++)

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -801,6 +801,14 @@ const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
801801
return &AMDGPU::VGPR_32RegClass;
802802
}
803803

804+
const TargetRegisterClass *
805+
SIRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
806+
if (isAGPRClass(RC) && !ST.hasGFX90AInsts())
807+
return getEquivalentVGPRClass(RC);
808+
809+
return RC;
810+
}
811+
804812
static unsigned getNumSubRegsForSpillOp(unsigned Op) {
805813

806814
switch (Op) {

llvm/lib/Target/AMDGPU/SIRegisterInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,13 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
108108
const TargetRegisterClass *getPointerRegClass(
109109
const MachineFunction &MF, unsigned Kind = 0) const override;
110110

111+
/// Returns a legal register class to copy a register in the specified class
112+
/// to or from. If it is possible to copy the register directly without using
113+
/// a cross register class copy, return the specified RC. Returns NULL if it
114+
/// is not possible to copy between two registers of the specified class.
115+
const TargetRegisterClass *
116+
getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
117+
111118
void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset,
112119
bool IsLoad, bool IsKill = true) const;
113120

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -march=amdgcn -mcpu=gfx908 %s -o - -run-pass machine-cp -verify-machineinstrs | FileCheck -check-prefix=GFX908 %s
3+
# RUN: llc -march=amdgcn -mcpu=gfx90a %s -o - -run-pass machine-cp -verify-machineinstrs | FileCheck -check-prefix=GFX90A %s
4+
5+
---
6+
name: do_not_propagate_agpr_to_agpr
7+
body: |
8+
bb.0:
9+
successors:
10+
liveins: $agpr0
11+
12+
; GFX908-LABEL: name: do_not_propagate_agpr_to_agpr
13+
; GFX908: renamable $vgpr0 = COPY renamable $agpr0, implicit $exec
14+
; GFX908: renamable $agpr1 = COPY renamable $vgpr0, implicit $exec
15+
; GFX908: renamable $agpr2 = COPY renamable $vgpr0, implicit $exec
16+
; GFX908: S_ENDPGM 0, implicit $vgpr0, implicit $agpr1, implicit $agpr2
17+
; GFX90A-LABEL: name: do_not_propagate_agpr_to_agpr
18+
; GFX90A: renamable $vgpr0 = COPY renamable $agpr0, implicit $exec
19+
; GFX90A: renamable $agpr1 = COPY $agpr0, implicit $exec
20+
; GFX90A: renamable $agpr2 = COPY $agpr0, implicit $exec
21+
; GFX90A: S_ENDPGM 0, implicit $vgpr0, implicit $agpr1, implicit $agpr2
22+
renamable $vgpr0 = COPY renamable $agpr0, implicit $exec
23+
renamable $agpr1 = COPY renamable $vgpr0, implicit $exec
24+
renamable $agpr2 = COPY renamable $vgpr0, implicit $exec
25+
S_ENDPGM 0, implicit $vgpr0, implicit $agpr1, implicit $agpr2
26+
...
27+
---
28+
name: propagate_vgpr_to_agpr
29+
body: |
30+
bb.0:
31+
successors:
32+
liveins: $vgpr0
33+
34+
; GFX908-LABEL: name: propagate_vgpr_to_agpr
35+
; GFX908: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec
36+
; GFX908: renamable $agpr1 = COPY $vgpr0, implicit $exec
37+
; GFX908: renamable $agpr2 = COPY $vgpr0, implicit $exec
38+
; GFX908: S_ENDPGM 0, implicit $agpr0, implicit $agpr1, implicit $agpr2
39+
; GFX90A-LABEL: name: propagate_vgpr_to_agpr
40+
; GFX90A: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec
41+
; GFX90A: renamable $agpr1 = COPY $vgpr0, implicit $exec
42+
; GFX90A: renamable $agpr2 = COPY $vgpr0, implicit $exec
43+
; GFX90A: S_ENDPGM 0, implicit $agpr0, implicit $agpr1, implicit $agpr2
44+
renamable $agpr0 = COPY renamable $vgpr0, implicit $exec
45+
renamable $agpr1 = COPY renamable $agpr0, implicit $exec
46+
renamable $agpr2 = COPY renamable $agpr0, implicit $exec
47+
S_ENDPGM 0, implicit $agpr0, implicit $agpr1, implicit $agpr2
48+
...
49+
---
50+
name: propagate_agpr_to_vgpr
51+
body: |
52+
bb.0:
53+
successors:
54+
liveins: $agpr0
55+
56+
; GFX908-LABEL: name: propagate_agpr_to_vgpr
57+
; GFX908: renamable $vgpr0 = COPY renamable $agpr0, implicit $exec
58+
; GFX908: renamable $vgpr1 = COPY $agpr0, implicit $exec
59+
; GFX908: renamable $vgpr2 = COPY $agpr0, implicit $exec
60+
; GFX908: S_ENDPGM 0, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
61+
; GFX90A-LABEL: name: propagate_agpr_to_vgpr
62+
; GFX90A: renamable $vgpr0 = COPY renamable $agpr0, implicit $exec
63+
; GFX90A: renamable $vgpr1 = COPY $agpr0, implicit $exec
64+
; GFX90A: renamable $vgpr2 = COPY $agpr0, implicit $exec
65+
; GFX90A: S_ENDPGM 0, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
66+
renamable $vgpr0 = COPY renamable $agpr0, implicit $exec
67+
renamable $vgpr1 = COPY renamable $vgpr0, implicit $exec
68+
renamable $vgpr2 = COPY renamable $vgpr0, implicit $exec
69+
S_ENDPGM 0, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
70+
...

0 commit comments

Comments
 (0)