@@ -130,10 +130,10 @@ enum WaitEventType {
130
130
// We reserve a fixed number of VGPR slots in the scoring tables for
131
131
// special tokens like SCMEM_LDS (needed for buffer load to LDS).
132
132
enum RegisterMapping {
133
- SQ_MAX_PGM_VGPRS = 512 , // Maximum programmable VGPRs across all targets.
134
- AGPR_OFFSET = 256 , // Maximum programmable ArchVGPRs across all targets.
135
- SQ_MAX_PGM_SGPRS = 128 , // Maximum programmable SGPRs across all targets.
136
- NUM_EXTRA_VGPRS = 9 , // Reserved slots for DS.
133
+ SQ_MAX_PGM_VGPRS = 1024 , // Maximum programmable VGPRs across all targets.
134
+ AGPR_OFFSET = 512 , // Maximum programmable ArchVGPRs across all targets.
135
+ SQ_MAX_PGM_SGPRS = 128 , // Maximum programmable SGPRs across all targets.
136
+ NUM_EXTRA_VGPRS = 9 , // Reserved slots for DS.
137
137
// Artificial register slots to track LDS writes into specific LDS locations
138
138
// if a location is known. When slots are exhausted or location is
139
139
// unknown use the first slot. The first slot is also always updated in
@@ -748,27 +748,32 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
748
748
749
749
RegInterval Result;
750
750
751
- unsigned Reg = TRI->getEncodingValue (AMDGPU::getMCReg (Op.getReg (), *ST)) &
752
- AMDGPU::HWEncoding::REG_IDX_MASK;
751
+ MCRegister MCReg = AMDGPU::getMCReg (Op.getReg (), *ST);
752
+ unsigned RegIdx = TRI->getHWRegIndex (MCReg);
753
+ assert (isUInt<8 >(RegIdx));
753
754
755
+ const TargetRegisterClass *RC = TRI->getPhysRegBaseClass (Op.getReg ());
756
+ unsigned Size = TRI->getRegSizeInBits (*RC);
757
+
758
+ // AGPRs/VGPRs are tracked every 16 bits, SGPRs by 32 bits
754
759
if (TRI->isVectorRegister (*MRI, Op.getReg ())) {
755
- assert (Reg <= SQ_MAX_PGM_VGPRS);
760
+ unsigned Reg = RegIdx << 1 | (AMDGPU::isHi16Reg (MCReg, *TRI) ? 1 : 0 );
761
+ assert (Reg < AGPR_OFFSET);
756
762
Result.first = Reg;
757
763
if (TRI->isAGPR (*MRI, Op.getReg ()))
758
764
Result.first += AGPR_OFFSET;
759
765
assert (Result.first >= 0 && Result.first < SQ_MAX_PGM_VGPRS);
760
- } else if (TRI->isSGPRReg (*MRI, Op.getReg ()) && Reg < SQ_MAX_PGM_SGPRS) {
766
+ assert (Size % 16 == 0 );
767
+ Result.second = Result.first + (Size / 16 );
768
+ } else if (TRI->isSGPRReg (*MRI, Op.getReg ()) && RegIdx < SQ_MAX_PGM_SGPRS) {
761
769
// SGPRs including VCC, TTMPs and EXEC but excluding read-only scalar
762
770
// sources like SRC_PRIVATE_BASE.
763
- Result.first = Reg + NUM_ALL_VGPRS;
771
+ Result.first = RegIdx + NUM_ALL_VGPRS;
772
+ Result.second = Result.first + divideCeil (Size, 32 );
764
773
} else {
765
774
return {-1 , -1 };
766
775
}
767
776
768
- const TargetRegisterClass *RC = TRI->getPhysRegBaseClass (Op.getReg ());
769
- unsigned Size = TRI->getRegSizeInBits (*RC);
770
- Result.second = Result.first + ((Size + 16 ) / 32 );
771
-
772
777
return Result;
773
778
}
774
779
0 commit comments