Skip to content

Commit 1ae1ff6

Browse files
committed
insertwaitcnt pass update for true16
1 parent 70828d9 commit 1ae1ff6

File tree

4 files changed

+50
-20
lines changed

4 files changed

+50
-20
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -137,10 +137,10 @@ enum WaitEventType {
137137
// We reserve a fixed number of VGPR slots in the scoring tables for
138138
// special tokens like SCMEM_LDS (needed for buffer load to LDS).
139139
enum RegisterMapping {
140-
SQ_MAX_PGM_VGPRS = 512, // Maximum programmable VGPRs across all targets.
141-
AGPR_OFFSET = 256, // Maximum programmable ArchVGPRs across all targets.
142-
SQ_MAX_PGM_SGPRS = 256, // Maximum programmable SGPRs across all targets.
143-
NUM_EXTRA_VGPRS = 9, // Reserved slots for DS.
140+
SQ_MAX_PGM_VGPRS = 1024, // Maximum programmable VGPRs across all targets.
141+
AGPR_OFFSET = 512, // Maximum programmable ArchVGPRs across all targets.
142+
SQ_MAX_PGM_SGPRS = 256, // Maximum programmable SGPRs across all targets.
143+
NUM_EXTRA_VGPRS = 9, // Reserved slots for DS.
144144
// Artificial register slots to track LDS writes into specific LDS locations
145145
// if a location is known. When slots are exhausted or location is
146146
// unknown use the first slot. The first slot is also always updated in
@@ -165,6 +165,17 @@ enum VmemType {
165165
NUM_VMEM_TYPES
166166
};
167167

168+
static unsigned getRegPoint(MCRegister Reg, const SIRegisterInfo &TRI) {
169+
// Order register interval points so that intervals of 32-bit VGPRs
170+
// include intervals of their 16-bit halves.
171+
MCRegister MCReg = AMDGPU::getMCReg(Reg, TRI.getSubtarget());
172+
unsigned RegIdx = TRI.getHWRegIndex(MCReg);
173+
bool IsHi = AMDGPU::isHi16Reg(MCReg, TRI);
174+
bool IsVector = TRI.isVectorRegister(MCReg);
175+
assert(isUInt<8>(RegIdx));
176+
return (IsVector ? 0x200 : 0) | (RegIdx << 1) | (IsHi ? 1 : 0);
177+
}
178+
168179
// Maps values of InstCounterType to the instruction that waits on that
169180
// counter. Only used if GCNSubtarget::hasExtendedWaitCounts()
170181
// returns true.
@@ -757,30 +768,31 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
757768

758769
RegInterval Result;
759770

760-
unsigned Reg = TRI->getEncodingValue(AMDGPU::getMCReg(Op.getReg(), *ST)) &
761-
AMDGPU::HWEncoding::REG_IDX_MASK;
771+
unsigned Reg = getRegPoint(Op.getReg(), *TRI);
772+
const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Op.getReg());
773+
unsigned Size = TRI->getRegSizeInBits(*RC);
762774

775+
// VGPRs are tracked every 16 bits, SGPRs by 32 bits
763776
if (TRI->isVectorRegister(*MRI, Op.getReg())) {
764777
assert(Reg >= Encoding.VGPR0 && Reg <= Encoding.VGPRL);
765778
Result.first = Reg - Encoding.VGPR0;
766779
if (TRI->isAGPR(*MRI, Op.getReg()))
767780
Result.first += AGPR_OFFSET;
768781
assert(Result.first >= 0 && Result.first < SQ_MAX_PGM_VGPRS);
782+
assert(Size % 16 == 0);
783+
Result.second = Result.first + (Size / 16);
769784
} else if (TRI->isSGPRReg(*MRI, Op.getReg())) {
770-
assert(Reg >= Encoding.SGPR0 && Reg < SQ_MAX_PGM_SGPRS);
771-
Result.first = Reg - Encoding.SGPR0 + NUM_ALL_VGPRS;
785+
assert(Reg >= Encoding.SGPR0 && Reg < SQ_MAX_PGM_SGPRS * 2);
786+
Result.first = ((Reg - Encoding.SGPR0) >> 1) + NUM_ALL_VGPRS;
772787
assert(Result.first >= NUM_ALL_VGPRS &&
773788
Result.first < SQ_MAX_PGM_SGPRS + NUM_ALL_VGPRS);
789+
Result.second = Result.first + divideCeil(Size, 32);
774790
}
775791
// TODO: Handle TTMP
776792
// else if (TRI->isTTMP(*MRI, Reg.getReg())) ...
777793
else
778794
return {-1, -1};
779795

780-
const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Op.getReg());
781-
unsigned Size = TRI->getRegSizeInBits(*RC);
782-
Result.second = Result.first + ((Size + 16) / 32);
783-
784796
return Result;
785797
}
786798

@@ -2452,16 +2464,14 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
24522464

24532465
unsigned NumVGPRsMax = ST->getAddressableNumVGPRs();
24542466
unsigned NumSGPRsMax = ST->getAddressableNumSGPRs();
2455-
assert(NumVGPRsMax <= SQ_MAX_PGM_VGPRS);
2467+
assert(NumVGPRsMax + AGPR_OFFSET <= SQ_MAX_PGM_VGPRS);
24562468
assert(NumSGPRsMax <= SQ_MAX_PGM_SGPRS);
24572469

24582470
RegisterEncoding Encoding = {};
2459-
Encoding.VGPR0 =
2460-
TRI->getEncodingValue(AMDGPU::VGPR0) & AMDGPU::HWEncoding::REG_IDX_MASK;
2461-
Encoding.VGPRL = Encoding.VGPR0 + NumVGPRsMax - 1;
2462-
Encoding.SGPR0 =
2463-
TRI->getEncodingValue(AMDGPU::SGPR0) & AMDGPU::HWEncoding::REG_IDX_MASK;
2464-
Encoding.SGPRL = Encoding.SGPR0 + NumSGPRsMax - 1;
2471+
Encoding.VGPR0 = getRegPoint(AMDGPU::VGPR0, *TRI);
2472+
Encoding.VGPRL = Encoding.VGPR0 + NumVGPRsMax * 2 - 1;
2473+
Encoding.SGPR0 = getRegPoint(AMDGPU::SGPR0, *TRI);
2474+
Encoding.SGPRL = Encoding.SGPR0 + NumSGPRsMax * 2 - 1;
24652475

24662476
BlockInfos.clear();
24672477
bool Modified = false;

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3611,13 +3611,26 @@ SIRegisterInfo::getRegClassForOperandReg(const MachineRegisterInfo &MRI,
36113611
return getSubRegisterClass(SrcRC, MO.getSubReg());
36123612
}
36133613

3614+
bool SIRegisterInfo::isVGPR(MCRegister Reg) const {
3615+
const TargetRegisterClass *RC = getPhysRegBaseClass(Reg);
3616+
// Registers without classes are unaddressable, SGPR-like registers.
3617+
return RC && isVGPRClass(RC);
3618+
}
3619+
36143620
bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
36153621
Register Reg) const {
36163622
const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
36173623
// Registers without classes are unaddressable, SGPR-like registers.
36183624
return RC && isVGPRClass(RC);
36193625
}
36203626

3627+
bool SIRegisterInfo::isAGPR(MCRegister Reg) const {
3628+
const TargetRegisterClass *RC = getPhysRegBaseClass(Reg);
3629+
3630+
// Registers without classes are unaddressable, SGPR-like registers.
3631+
return RC && isAGPRClass(RC);
3632+
}
3633+
36213634
bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI,
36223635
Register Reg) const {
36233636
const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);

llvm/lib/Target/AMDGPU/SIRegisterInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
6464
bool hasReg() { return VGPR != 0; }
6565
};
6666

67+
const GCNSubtarget &getSubtarget() const { return ST; }
68+
6769
/// \returns the sub reg enum value for the given \p Channel
6870
/// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
6971
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1);
@@ -295,8 +297,13 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
295297
getRegClassForOperandReg(const MachineRegisterInfo &MRI,
296298
const MachineOperand &MO) const;
297299

300+
bool isVGPR(MCRegister Reg) const;
298301
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const;
302+
bool isAGPR(MCRegister Reg) const;
299303
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const;
304+
bool isVectorRegister(MCRegister Reg) const {
305+
return isVGPR(Reg) || isAGPR(Reg);
306+
}
300307
bool isVectorRegister(const MachineRegisterInfo &MRI, Register Reg) const {
301308
return isVGPR(MRI, Reg) || isAGPR(MRI, Reg);
302309
}

llvm/test/CodeGen/AMDGPU/spillv16.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ define void @spill_i16_alu_two_vals() {
6161
; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:4 glc dlc
6262
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
6363
; GCN-TRUE16-NEXT: scratch_load_d16_hi_b16 v0, off, s32 offset:6 ; 2-byte Folded Reload
64-
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
6564
; GCN-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
65+
; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0)
6666
; GCN-TRUE16-NEXT: scratch_store_d16_hi_b16 off, v0, s32 dlc
6767
; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
6868
; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:4 dlc

0 commit comments

Comments
 (0)