@@ -3012,7 +3012,9 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
3012
3012
return true ;
3013
3013
}
3014
3014
3015
- static unsigned baseSGPRNumber (Register Reg, const SIRegisterInfo &TRI) {
3015
+ // Return the numeric ID 0-63 of an 64b SGPR pair for a given SGPR.
3016
+ // i.e. SGPR0 = SGPR0_SGPR1 = 0, SGPR3 = SGPR2_SGPR3 = 1, etc
3017
+ static unsigned sgprPairNumber (Register Reg, const SIRegisterInfo &TRI) {
3016
3018
unsigned RegN = TRI.getEncodingValue (Reg);
3017
3019
assert (RegN <= 127 );
3018
3020
return (RegN >> 1 ) & 0x3f ;
@@ -3058,13 +3060,14 @@ void GCNHazardRecognizer::computeVALUHazardSGPRs(MachineFunction *MMF) {
3058
3060
for (auto &MI : reverse (MBB->instrs ())) {
3059
3061
bool IsVALU = SIInstrInfo::isVALU (MI);
3060
3062
bool IsSALU = SIInstrInfo::isSALU (MI);
3061
- if (!( IsVALU || IsSALU) )
3063
+ if (!IsVALU && ! IsSALU)
3062
3064
continue ;
3063
3065
3064
3066
for (const MachineOperand &Op : MI.operands ()) {
3065
3067
if (!Op.isReg ())
3066
3068
continue ;
3067
3069
Register Reg = Op.getReg ();
3070
+ assert (!Op.getSubReg ());
3068
3071
// Only consider implicit operands of VCC.
3069
3072
if (Op.isImplicit () && !(Reg == AMDGPU::VCC_LO ||
3070
3073
Reg == AMDGPU::VCC_HI || Reg == AMDGPU::VCC))
@@ -3073,7 +3076,7 @@ void GCNHazardRecognizer::computeVALUHazardSGPRs(MachineFunction *MMF) {
3073
3076
continue ;
3074
3077
if (TRI.getEncodingValue (Reg) >= SGPR_NULL)
3075
3078
continue ;
3076
- unsigned RegN = baseSGPRNumber (Reg, TRI);
3079
+ unsigned RegN = sgprPairNumber (Reg, TRI);
3077
3080
if (IsVALU && Op.isUse ()) {
3078
3081
// Note: any access within a cycle must be considered a hazard.
3079
3082
if (InCycle || (ReadSGPRs[RegN] && SALUWriteSGPRs[RegN]))
@@ -3147,10 +3150,9 @@ bool GCNHazardRecognizer::fixVALUReadSGPRHazard(MachineInstr *MI) {
3147
3150
3148
3151
// All SGPR writes before a call/return must be flushed as the callee/caller
3149
3152
// will not will not see the hazard chain, i.e. (2) to (3) described above.
3150
- const bool IsSetPC = (MI->getOpcode () == AMDGPU::S_SETPC_B64 ||
3151
- MI->getOpcode () == AMDGPU::S_SETPC_B64_return ||
3152
- MI->getOpcode () == AMDGPU::S_SWAPPC_B64 ||
3153
- MI->getOpcode () == AMDGPU::S_CALL_B64);
3153
+ const bool IsSetPC = (MI->isCall () || MI->isReturn ()) &&
3154
+ !(MI->getOpcode () == AMDGPU::S_ENDPGM ||
3155
+ MI->getOpcode () == AMDGPU::S_ENDPGM_SAVED);
3154
3156
3155
3157
// Collect all SGPR sources for MI which are read by a VALU.
3156
3158
const unsigned SGPR_NULL = TRI.getEncodingValue (AMDGPU::SGPR_NULL_gfx11plus);
@@ -3173,7 +3175,7 @@ bool GCNHazardRecognizer::fixVALUReadSGPRHazard(MachineInstr *MI) {
3173
3175
if (TRI.getEncodingValue (OpReg) >= SGPR_NULL)
3174
3176
continue ;
3175
3177
3176
- unsigned RegN = baseSGPRNumber (OpReg, TRI);
3178
+ unsigned RegN = sgprPairNumber (OpReg, TRI);
3177
3179
if (!VALUReadHazardSGPRs[RegN])
3178
3180
continue ;
3179
3181
@@ -3194,7 +3196,7 @@ bool GCNHazardRecognizer::fixVALUReadSGPRHazard(MachineInstr *MI) {
3194
3196
if (IsSetPC && I.getNumDefs () > 0 )
3195
3197
return true ;
3196
3198
// Check for any register writes.
3197
- return llvm:: any_of (SGPRsUsed, [this , &I](Register Reg) {
3199
+ return any_of (SGPRsUsed, [this , &I](Register Reg) {
3198
3200
return I.modifiesRegister (Reg, &TRI);
3199
3201
});
3200
3202
};
@@ -3215,9 +3217,8 @@ bool GCNHazardRecognizer::fixVALUReadSGPRHazard(MachineInstr *MI) {
3215
3217
if (!SIInstrInfo::isSALU (I) || SIInstrInfo::isSOPP (I))
3216
3218
return 0 ;
3217
3219
// SALU must be unrelated to any hazard registers.
3218
- if (llvm::any_of (SGPRsUsed, [this , &I](Register Reg) {
3219
- return I.readsRegister (Reg, &TRI);
3220
- }))
3220
+ if (any_of (SGPRsUsed,
3221
+ [this , &I](Register Reg) { return I.readsRegister (Reg, &TRI); }))
3221
3222
return 0 ;
3222
3223
return 1 ;
3223
3224
};
@@ -3239,14 +3240,14 @@ bool GCNHazardRecognizer::fixVALUReadSGPRHazard(MachineInstr *MI) {
3239
3240
if (Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::VCC_HI)
3240
3241
return Register (AMDGPU::VCC);
3241
3242
// TODO: handle TTMP?
3242
- return Register (AMDGPU::SGPR0_SGPR1 + baseSGPRNumber (Reg, TRI));
3243
+ return Register (AMDGPU::SGPR0_SGPR1 + sgprPairNumber (Reg, TRI));
3243
3244
};
3244
3245
auto SearchHazardFn = [this , hazardPair,
3245
3246
&SGPRsUsed](const MachineInstr &I) {
3246
3247
if (!SIInstrInfo::isVALU (I))
3247
3248
return false ;
3248
3249
// Check for any register reads.
3249
- return llvm:: any_of (SGPRsUsed, [this , hazardPair, &I](Register Reg) {
3250
+ return any_of (SGPRsUsed, [this , hazardPair, &I](Register Reg) {
3250
3251
return I.readsRegister (hazardPair (Reg), &TRI);
3251
3252
});
3252
3253
};
0 commit comments