@@ -130,10 +130,10 @@ enum WaitEventType {
130
130
// We reserve a fixed number of VGPR slots in the scoring tables for
131
131
// special tokens like SCMEM_LDS (needed for buffer load to LDS).
132
132
enum RegisterMapping {
133
- SQ_MAX_PGM_VGPRS = 512 , // Maximum programmable VGPRs across all targets.
134
- AGPR_OFFSET = 256 , // Maximum programmable ArchVGPRs across all targets.
135
- SQ_MAX_PGM_SGPRS = 128 , // Maximum programmable SGPRs across all targets.
136
- NUM_EXTRA_VGPRS = 9 , // Reserved slots for DS.
133
+ SQ_MAX_PGM_VGPRS = 1024 , // Maximum programmable VGPRs across all targets.
134
+ AGPR_OFFSET = 512 , // Maximum programmable ArchVGPRs across all targets.
135
+ SQ_MAX_PGM_SGPRS = 256 , // Maximum programmable SGPRs across all targets.
136
+ NUM_EXTRA_VGPRS = 9 , // Reserved slots for DS.
137
137
// Artificial register slots to track LDS writes into specific LDS locations
138
138
// if a location is known. When slots are exhausted or location is
139
139
// unknown use the first slot. The first slot is also always updated in
@@ -748,26 +748,34 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
748
748
749
749
RegInterval Result;
750
750
751
- unsigned Reg = TRI->getEncodingValue (AMDGPU::getMCReg (Op.getReg (), *ST)) &
752
- AMDGPU::HWEncoding::REG_IDX_MASK;
751
+ MCRegister MCReg = AMDGPU::getMCReg (Op.getReg (), *ST);
752
+ unsigned RegIdx = TRI->getHWRegIndex (MCReg);
753
+ assert (isUInt<8 >(RegIdx));
754
+ unsigned Reg = (RegIdx << 1 ) | (AMDGPU::isHi16Reg (MCReg, *TRI) ? 1 : 0 );
753
755
756
+ const TargetRegisterClass *RC = TRI->getPhysRegBaseClass (Op.getReg ());
757
+ unsigned Size = TRI->getRegSizeInBits (*RC);
758
+
759
+ // AGPRs/VGPRs are tracked every 16 bits, SGPRs by 32 bits
754
760
if (TRI->isVectorRegister (*MRI, Op.getReg ())) {
755
761
assert (Reg <= SQ_MAX_PGM_VGPRS);
756
762
Result.first = Reg;
757
763
if (TRI->isAGPR (*MRI, Op.getReg ()))
758
764
Result.first += AGPR_OFFSET;
759
765
assert (Result.first >= 0 && Result.first < SQ_MAX_PGM_VGPRS);
760
- } else if (TRI->isSGPRReg (*MRI, Op.getReg ()) && Reg < SQ_MAX_PGM_SGPRS) {
761
- // SGPRs including VCC, TTMPs and EXEC but excluding read-only scalar
762
- // sources like SRC_PRIVATE_BASE.
763
- Result.first = Reg + NUM_ALL_VGPRS;
764
- } else {
766
+ assert (Size % 16 == 0 );
767
+ Result.second = Result.first + (Size / 16 );
768
+ } else if (TRI->isSGPRReg (*MRI, Op.getReg ())) {
769
+ assert (Reg < SQ_MAX_PGM_SGPRS * 2 );
770
+ Result.first = (Reg >> 1 ) + NUM_ALL_VGPRS;
771
+ assert (Result.first >= NUM_ALL_VGPRS &&
772
+ Result.first < SQ_MAX_PGM_SGPRS + NUM_ALL_VGPRS);
773
+ Result.second = Result.first + divideCeil (Size, 32 );
774
+ }
775
+ // TODO: Handle TTMP
776
+ // else if (TRI->isTTMP(*MRI, Reg.getReg())) ...
777
+ else
765
778
return {-1 , -1 };
766
- }
767
-
768
- const TargetRegisterClass *RC = TRI->getPhysRegBaseClass (Op.getReg ());
769
- unsigned Size = TRI->getRegSizeInBits (*RC);
770
- Result.second = Result.first + ((Size + 16 ) / 32 );
771
779
772
780
return Result;
773
781
}
0 commit comments