@@ -2920,7 +2920,9 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
2920
2920
return true ;
2921
2921
}
2922
2922
2923
- static unsigned baseSGPRNumber (Register Reg, const SIRegisterInfo &TRI) {
2923
+ // Return the numeric ID 0-63 of an 64b SGPR pair for a given SGPR.
2924
+ // i.e. SGPR0 = SGPR0_SGPR1 = 0, SGPR3 = SGPR2_SGPR3 = 1, etc
2925
+ static unsigned sgprPairNumber (Register Reg, const SIRegisterInfo &TRI) {
2924
2926
unsigned RegN = TRI.getEncodingValue (Reg);
2925
2927
assert (RegN <= 127 );
2926
2928
return (RegN >> 1 ) & 0x3f ;
@@ -2966,13 +2968,14 @@ void GCNHazardRecognizer::computeVALUHazardSGPRs(MachineFunction *MMF) {
2966
2968
for (auto &MI : reverse (MBB->instrs ())) {
2967
2969
bool IsVALU = SIInstrInfo::isVALU (MI);
2968
2970
bool IsSALU = SIInstrInfo::isSALU (MI);
2969
- if (!( IsVALU || IsSALU) )
2971
+ if (!IsVALU && ! IsSALU)
2970
2972
continue ;
2971
2973
2972
2974
for (const MachineOperand &Op : MI.operands ()) {
2973
2975
if (!Op.isReg ())
2974
2976
continue ;
2975
2977
Register Reg = Op.getReg ();
2978
+ assert (!Op.getSubReg ());
2976
2979
// Only consider implicit operands of VCC.
2977
2980
if (Op.isImplicit () && !(Reg == AMDGPU::VCC_LO ||
2978
2981
Reg == AMDGPU::VCC_HI || Reg == AMDGPU::VCC))
@@ -2981,7 +2984,7 @@ void GCNHazardRecognizer::computeVALUHazardSGPRs(MachineFunction *MMF) {
2981
2984
continue ;
2982
2985
if (TRI.getEncodingValue (Reg) >= SGPR_NULL)
2983
2986
continue ;
2984
- unsigned RegN = baseSGPRNumber (Reg, TRI);
2987
+ unsigned RegN = sgprPairNumber (Reg, TRI);
2985
2988
if (IsVALU && Op.isUse ()) {
2986
2989
// Note: any access within a cycle must be considered a hazard.
2987
2990
if (InCycle || (ReadSGPRs[RegN] && SALUWriteSGPRs[RegN]))
@@ -3055,10 +3058,9 @@ bool GCNHazardRecognizer::fixVALUReadSGPRHazard(MachineInstr *MI) {
3055
3058
3056
3059
// All SGPR writes before a call/return must be flushed as the callee/caller
3057
3060
// will not will not see the hazard chain, i.e. (2) to (3) described above.
3058
- const bool IsSetPC = (MI->getOpcode () == AMDGPU::S_SETPC_B64 ||
3059
- MI->getOpcode () == AMDGPU::S_SETPC_B64_return ||
3060
- MI->getOpcode () == AMDGPU::S_SWAPPC_B64 ||
3061
- MI->getOpcode () == AMDGPU::S_CALL_B64);
3061
+ const bool IsSetPC = (MI->isCall () || MI->isReturn ()) &&
3062
+ !(MI->getOpcode () == AMDGPU::S_ENDPGM ||
3063
+ MI->getOpcode () == AMDGPU::S_ENDPGM_SAVED);
3062
3064
3063
3065
// Collect all SGPR sources for MI which are read by a VALU.
3064
3066
const unsigned SGPR_NULL = TRI.getEncodingValue (AMDGPU::SGPR_NULL_gfx11plus);
@@ -3081,7 +3083,7 @@ bool GCNHazardRecognizer::fixVALUReadSGPRHazard(MachineInstr *MI) {
3081
3083
if (TRI.getEncodingValue (OpReg) >= SGPR_NULL)
3082
3084
continue ;
3083
3085
3084
- unsigned RegN = baseSGPRNumber (OpReg, TRI);
3086
+ unsigned RegN = sgprPairNumber (OpReg, TRI);
3085
3087
if (!VALUReadHazardSGPRs[RegN])
3086
3088
continue ;
3087
3089
@@ -3102,7 +3104,7 @@ bool GCNHazardRecognizer::fixVALUReadSGPRHazard(MachineInstr *MI) {
3102
3104
if (IsSetPC && I.getNumDefs () > 0 )
3103
3105
return true ;
3104
3106
// Check for any register writes.
3105
- return llvm:: any_of (SGPRsUsed, [this , &I](Register Reg) {
3107
+ return any_of (SGPRsUsed, [this , &I](Register Reg) {
3106
3108
return I.modifiesRegister (Reg, &TRI);
3107
3109
});
3108
3110
};
@@ -3123,9 +3125,8 @@ bool GCNHazardRecognizer::fixVALUReadSGPRHazard(MachineInstr *MI) {
3123
3125
if (!SIInstrInfo::isSALU (I) || SIInstrInfo::isSOPP (I))
3124
3126
return 0 ;
3125
3127
// SALU must be unrelated to any hazard registers.
3126
- if (llvm::any_of (SGPRsUsed, [this , &I](Register Reg) {
3127
- return I.readsRegister (Reg, &TRI);
3128
- }))
3128
+ if (any_of (SGPRsUsed,
3129
+ [this , &I](Register Reg) { return I.readsRegister (Reg, &TRI); }))
3129
3130
return 0 ;
3130
3131
return 1 ;
3131
3132
};
@@ -3147,14 +3148,14 @@ bool GCNHazardRecognizer::fixVALUReadSGPRHazard(MachineInstr *MI) {
3147
3148
if (Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::VCC_HI)
3148
3149
return Register (AMDGPU::VCC);
3149
3150
// TODO: handle TTMP?
3150
- return Register (AMDGPU::SGPR0_SGPR1 + baseSGPRNumber (Reg, TRI));
3151
+ return Register (AMDGPU::SGPR0_SGPR1 + sgprPairNumber (Reg, TRI));
3151
3152
};
3152
3153
auto SearchHazardFn = [this , hazardPair,
3153
3154
&SGPRsUsed](const MachineInstr &I) {
3154
3155
if (!SIInstrInfo::isVALU (I))
3155
3156
return false ;
3156
3157
// Check for any register reads.
3157
- return llvm:: any_of (SGPRsUsed, [this , hazardPair, &I](Register Reg) {
3158
+ return any_of (SGPRsUsed, [this , hazardPair, &I](Register Reg) {
3158
3159
return I.readsRegister (hazardPair (Reg), &TRI);
3159
3160
});
3160
3161
};
0 commit comments