Skip to content

Commit 5fa2f79

Browse files
committed
Remove 16bit COPY pre-RA hint, update tests
1 parent 90d554e commit 5fa2f79

File tree

12 files changed

+1221
-708
lines changed

12 files changed

+1221
-708
lines changed

llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp

Lines changed: 0 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -22,18 +22,11 @@
2222
/// although the same shall be possible with other register classes and
2323
/// instructions if necessary.
2424
///
25-
/// This pass also adds register allocation hints to COPY.
26-
/// The hints will be post-processed by SIRegisterInfo::getRegAllocationHints.
27-
/// When using True16, we often see COPY moving a 16-bit value between a VGPR_32
28-
/// and a VGPR_16. If we use the VGPR_16 that corresponds to the lo16 bits of
29-
/// the VGPR_32, the COPY can be completely eliminated.
30-
3125
//===----------------------------------------------------------------------===//
3226

3327
#include "AMDGPU.h"
3428
#include "GCNSubtarget.h"
3529
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
36-
#include "SIRegisterInfo.h"
3730
#include "llvm/CodeGen/LiveIntervals.h"
3831
#include "llvm/CodeGen/MachineFunctionPass.h"
3932
#include "llvm/InitializePasses.h"
@@ -243,38 +236,5 @@ bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) {
243236
Changed |= processReg(Reg);
244237
}
245238

246-
if (!ST.useRealTrue16Insts())
247-
return Changed;
248-
249-
// Add RA hints to improve True16 COPY elimination.
250-
for (const MachineBasicBlock &MBB : MF) {
251-
for (const MachineInstr &MI : MBB) {
252-
if (MI.getOpcode() != AMDGPU::COPY)
253-
continue;
254-
Register Dst = MI.getOperand(0).getReg();
255-
Register Src = MI.getOperand(1).getReg();
256-
if (Dst.isVirtual() &&
257-
MRI->getRegClass(Dst) == &AMDGPU::VGPR_16RegClass &&
258-
Src.isPhysical() &&
259-
TRI->getRegClassForReg(*MRI, Src) == &AMDGPU::VGPR_32RegClass)
260-
MRI->setRegAllocationHint(Dst, 0, TRI->getSubReg(Src, AMDGPU::lo16));
261-
if (Src.isVirtual() &&
262-
MRI->getRegClass(Src) == &AMDGPU::VGPR_16RegClass &&
263-
Dst.isPhysical() &&
264-
TRI->getRegClassForReg(*MRI, Dst) == &AMDGPU::VGPR_32RegClass)
265-
MRI->setRegAllocationHint(Src, 0, TRI->getSubReg(Dst, AMDGPU::lo16));
266-
if (!Dst.isVirtual() || !Src.isVirtual())
267-
continue;
268-
if (MRI->getRegClass(Dst) == &AMDGPU::VGPR_32RegClass &&
269-
MRI->getRegClass(Src) == &AMDGPU::VGPR_16RegClass) {
270-
MRI->setRegAllocationHint(Dst, AMDGPURI::Size32, Src);
271-
MRI->setRegAllocationHint(Src, AMDGPURI::Size16, Dst);
272-
}
273-
if (MRI->getRegClass(Dst) == &AMDGPU::VGPR_16RegClass &&
274-
MRI->getRegClass(Src) == &AMDGPU::VGPR_32RegClass)
275-
MRI->setRegAllocationHint(Dst, AMDGPURI::Size16, Src);
276-
}
277-
}
278-
279239
return Changed;
280240
}

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 0 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -3327,73 +3327,6 @@ const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
33273327
return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
33283328
}
33293329

3330-
bool SIRegisterInfo::getRegAllocationHints(Register VirtReg,
3331-
ArrayRef<MCPhysReg> Order,
3332-
SmallVectorImpl<MCPhysReg> &Hints,
3333-
const MachineFunction &MF,
3334-
const VirtRegMap *VRM,
3335-
const LiveRegMatrix *Matrix) const {
3336-
3337-
const MachineRegisterInfo &MRI = MF.getRegInfo();
3338-
const SIRegisterInfo *TRI = ST.getRegisterInfo();
3339-
3340-
std::pair<unsigned, Register> Hint = MRI.getRegAllocationHint(VirtReg);
3341-
3342-
switch (Hint.first) {
3343-
case AMDGPURI::Size32: {
3344-
Register Paired = Hint.second;
3345-
assert(Paired);
3346-
Register PairedPhys;
3347-
if (Paired.isPhysical()) {
3348-
PairedPhys =
3349-
getMatchingSuperReg(Paired, AMDGPU::lo16, &AMDGPU::VGPR_32RegClass);
3350-
} else if (VRM && VRM->hasPhys(Paired)) {
3351-
PairedPhys = getMatchingSuperReg(VRM->getPhys(Paired), AMDGPU::lo16,
3352-
&AMDGPU::VGPR_32RegClass);
3353-
}
3354-
3355-
// Prefer the paired physreg.
3356-
if (PairedPhys)
3357-
// isLo(Paired) is implicitly true here from the API of
3358-
// getMatchingSuperReg.
3359-
Hints.push_back(PairedPhys);
3360-
return false;
3361-
}
3362-
case AMDGPURI::Size16: {
3363-
Register Paired = Hint.second;
3364-
assert(Paired);
3365-
Register PairedPhys;
3366-
if (Paired.isPhysical()) {
3367-
PairedPhys = TRI->getSubReg(Paired, AMDGPU::lo16);
3368-
} else if (VRM && VRM->hasPhys(Paired)) {
3369-
PairedPhys = TRI->getSubReg(VRM->getPhys(Paired), AMDGPU::lo16);
3370-
}
3371-
3372-
// First prefer the paired physreg.
3373-
if (PairedPhys)
3374-
Hints.push_back(PairedPhys);
3375-
else {
3376-
// Add all the lo16 physregs.
3377-
// When the Paired operand has not yet been assigned a physreg it is
3378-
// better to try putting VirtReg in a lo16 register, because possibly
3379-
// later Paired can be assigned to the overlapping register and the COPY
3380-
// can be eliminated.
3381-
for (MCPhysReg PhysReg : Order) {
3382-
if (PhysReg == PairedPhys || AMDGPU::isHi(PhysReg, *this))
3383-
continue;
3384-
if (AMDGPU::VGPR_16RegClass.contains(PhysReg) &&
3385-
!MRI.isReserved(PhysReg))
3386-
Hints.push_back(PhysReg);
3387-
}
3388-
}
3389-
return false;
3390-
}
3391-
default:
3392-
return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF,
3393-
VRM);
3394-
}
3395-
}
3396-
33973330
MCRegister SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const {
33983331
// Not a callee saved register.
33993332
return AMDGPU::SGPR30_SGPR31;

llvm/lib/Target/AMDGPU/SIRegisterInfo.h

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,6 @@ class LiveRegUnits;
2929
class RegisterBank;
3030
struct SGPRSpillBuilder;
3131

32-
/// Register allocation hint types. Helps eliminate unneeded COPY with True16
33-
namespace AMDGPURI {
34-
35-
enum { Size16 = 1, Size32 = 2 };
36-
37-
} // end namespace AMDGPURI
38-
3932
class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
4033
private:
4134
const GCNSubtarget &ST;
@@ -333,11 +326,6 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
333326
unsigned getRegPressureSetLimit(const MachineFunction &MF,
334327
unsigned Idx) const override;
335328

336-
bool getRegAllocationHints(Register VirtReg, ArrayRef<MCPhysReg> Order,
337-
SmallVectorImpl<MCPhysReg> &Hints,
338-
const MachineFunction &MF, const VirtRegMap *VRM,
339-
const LiveRegMatrix *Matrix) const override;
340-
341329
const int *getRegUnitPressureSets(unsigned RegUnit) const override;
342330

343331
MCRegister getReturnAddressReg(const MachineFunction &MF) const;

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,10 @@ bool SIShrinkInstructions::shouldShrinkTrue16(MachineInstr &MI) const {
152152
if (AMDGPU::VGPR_32RegClass.contains(Reg) &&
153153
!AMDGPU::VGPR_32_Lo128RegClass.contains(Reg))
154154
return false;
155+
156+
if (AMDGPU::VGPR_16RegClass.contains(Reg) &&
157+
!AMDGPU::VGPR_16_Lo128RegClass.contains(Reg))
158+
return false;
155159
}
156160
}
157161
return true;

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,11 @@ body: |
4949
; GFX11: liveins: $vgpr0
5050
; GFX11-NEXT: {{ $}}
5151
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
52-
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
52+
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
5353
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
54-
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CEIL_F16_t16_e64_]]
55-
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
54+
; GFX11-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
55+
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_CEIL_F16_t16_e64_]], %subreg.lo16, [[DEF]], %subreg.hi16
56+
; GFX11-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
5657
;
5758
; GFX11-FAKE16-LABEL: name: fceil_s16_vv
5859
; GFX11-FAKE16: liveins: $vgpr0
@@ -89,8 +90,9 @@ body: |
8990
; GFX11-NEXT: {{ $}}
9091
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
9192
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
92-
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_CEIL_F16_t16_e64_]]
93-
; GFX11-NEXT: $vgpr0 = COPY [[COPY1]]
93+
; GFX11-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
94+
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_CEIL_F16_t16_e64_]], %subreg.lo16, [[DEF]], %subreg.hi16
95+
; GFX11-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
9496
;
9597
; GFX11-FAKE16-LABEL: name: fceil_s16_vs
9698
; GFX11-FAKE16: liveins: $sgpr0
@@ -126,10 +128,11 @@ body: |
126128
; GFX11: liveins: $vgpr0
127129
; GFX11-NEXT: {{ $}}
128130
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
129-
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
131+
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
130132
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 1, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
131-
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CEIL_F16_t16_e64_]]
132-
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
133+
; GFX11-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
134+
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_CEIL_F16_t16_e64_]], %subreg.lo16, [[DEF]], %subreg.hi16
135+
; GFX11-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
133136
;
134137
; GFX11-FAKE16-LABEL: name: fceil_fneg_s16_vv
135138
; GFX11-FAKE16: liveins: $vgpr0

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,11 @@ body: |
5858
; GFX11: liveins: $vgpr0
5959
; GFX11-NEXT: {{ $}}
6060
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
61-
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
61+
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
6262
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
63-
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
64-
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
63+
; GFX11-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
64+
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_FLOOR_F16_t16_e64_]], %subreg.lo16, [[DEF]], %subreg.hi16
65+
; GFX11-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
6566
;
6667
; GFX11-FAKE16-LABEL: name: ffloor_s16_vv
6768
; GFX11-FAKE16: liveins: $vgpr0
@@ -98,8 +99,9 @@ body: |
9899
; GFX11-NEXT: {{ $}}
99100
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
100101
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
101-
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
102-
; GFX11-NEXT: $vgpr0 = COPY [[COPY1]]
102+
; GFX11-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
103+
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_FLOOR_F16_t16_e64_]], %subreg.lo16, [[DEF]], %subreg.hi16
104+
; GFX11-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
103105
;
104106
; GFX11-FAKE16-LABEL: name: ffloor_s16_vs
105107
; GFX11-FAKE16: liveins: $sgpr0
@@ -135,10 +137,11 @@ body: |
135137
; GFX11: liveins: $vgpr0
136138
; GFX11-NEXT: {{ $}}
137139
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
138-
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
140+
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
139141
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 1, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
140-
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
141-
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
142+
; GFX11-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
143+
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_FLOOR_F16_t16_e64_]], %subreg.lo16, [[DEF]], %subreg.hi16
144+
; GFX11-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
142145
;
143146
; GFX11-FAKE16-LABEL: name: ffloor_fneg_s16_vv
144147
; GFX11-FAKE16: liveins: $vgpr0

0 commit comments

Comments
 (0)