|
14 | 14 | #include "GCNSubtarget.h"
|
15 | 15 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
16 | 16 | #include "SIMachineFunctionInfo.h"
|
| 17 | +#include "llvm/CodeGen/MachineFrameInfo.h" |
17 | 18 | #include "llvm/CodeGen/MachineFunction.h"
|
18 | 19 | #include "llvm/CodeGen/ScheduleDAG.h"
|
19 | 20 | #include "llvm/TargetParser/TargetParser.h"
|
@@ -1203,6 +1204,7 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
|
1203 | 1204 | fixWMMAHazards(MI);
|
1204 | 1205 | fixShift64HighRegBug(MI);
|
1205 | 1206 | fixVALUMaskWriteHazard(MI);
|
| 1207 | + fixRequiredExportPriority(MI); |
1206 | 1208 | }
|
1207 | 1209 |
|
1208 | 1210 | static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI,
|
@@ -3067,3 +3069,113 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
|
3067 | 3069 |
|
3068 | 3070 | return true;
|
3069 | 3071 | }
|
| 3072 | + |
| 3073 | +static bool ensureEntrySetPrio(MachineFunction *MF, int Priority, |
| 3074 | + const SIInstrInfo &TII) { |
| 3075 | + MachineBasicBlock &EntryMBB = MF->front(); |
| 3076 | + if (EntryMBB.begin() != EntryMBB.end()) { |
| 3077 | + auto &EntryMI = *EntryMBB.begin(); |
| 3078 | + if (EntryMI.getOpcode() == AMDGPU::S_SETPRIO && |
| 3079 | + EntryMI.getOperand(0).getImm() >= Priority) |
| 3080 | + return false; |
| 3081 | + } |
| 3082 | + |
| 3083 | + BuildMI(EntryMBB, EntryMBB.begin(), DebugLoc(), TII.get(AMDGPU::S_SETPRIO)) |
| 3084 | + .addImm(Priority); |
| 3085 | + return true; |
| 3086 | +} |
| 3087 | + |
| 3088 | +bool GCNHazardRecognizer::fixRequiredExportPriority(MachineInstr *MI) { |
| 3089 | + if (!ST.hasRequiredExportPriority()) |
| 3090 | + return false; |
| 3091 | + |
| 3092 | + // Assume the following shader types will never have exports, |
| 3093 | + // and avoid adding or adjusting S_SETPRIO. |
| 3094 | + MachineBasicBlock *MBB = MI->getParent(); |
| 3095 | + MachineFunction *MF = MBB->getParent(); |
| 3096 | + auto CC = MF->getFunction().getCallingConv(); |
| 3097 | + switch (CC) { |
| 3098 | + case CallingConv::AMDGPU_CS: |
| 3099 | + case CallingConv::AMDGPU_CS_Chain: |
| 3100 | + case CallingConv::AMDGPU_CS_ChainPreserve: |
| 3101 | + case CallingConv::AMDGPU_KERNEL: |
| 3102 | + return false; |
| 3103 | + default: |
| 3104 | + break; |
| 3105 | + } |
| 3106 | + |
| 3107 | + const int MaxPriority = 3; |
| 3108 | + const int NormalPriority = 2; |
| 3109 | + const int PostExportPriority = 0; |
| 3110 | + |
| 3111 | + auto It = MI->getIterator(); |
| 3112 | + switch (MI->getOpcode()) { |
| 3113 | + case AMDGPU::S_ENDPGM: |
| 3114 | + case AMDGPU::S_ENDPGM_SAVED: |
| 3115 | + case AMDGPU::S_ENDPGM_ORDERED_PS_DONE: |
| 3116 | + case AMDGPU::SI_RETURN_TO_EPILOG: |
| 3117 | + // Ensure shader with calls raises priority at entry. |
| 3118 | + // This ensures correct priority if exports exist in callee. |
| 3119 | + if (MF->getFrameInfo().hasCalls()) |
| 3120 | + return ensureEntrySetPrio(MF, NormalPriority, TII); |
| 3121 | + return false; |
| 3122 | + case AMDGPU::S_SETPRIO: { |
| 3123 | + // Raise minimum priority unless in workaround. |
| 3124 | + auto &PrioOp = MI->getOperand(0); |
| 3125 | + int Prio = PrioOp.getImm(); |
| 3126 | + bool InWA = (Prio == PostExportPriority) && |
| 3127 | + (It != MBB->begin() && TII.isEXP(*std::prev(It))); |
| 3128 | + if (InWA || Prio >= NormalPriority) |
| 3129 | + return false; |
| 3130 | + PrioOp.setImm(std::min(Prio + NormalPriority, MaxPriority)); |
| 3131 | + return true; |
| 3132 | + } |
| 3133 | + default: |
| 3134 | + if (!TII.isEXP(*MI)) |
| 3135 | + return false; |
| 3136 | + break; |
| 3137 | + } |
| 3138 | + |
| 3139 | + // Check entry priority at each export (as there will only be a few). |
| 3140 | + // Note: amdgpu_gfx can only be a callee, so defer to caller setprio. |
| 3141 | + bool Changed = false; |
| 3142 | + if (CC != CallingConv::AMDGPU_Gfx) |
| 3143 | + Changed = ensureEntrySetPrio(MF, NormalPriority, TII); |
| 3144 | + |
| 3145 | + auto NextMI = std::next(It); |
| 3146 | + bool EndOfShader = false; |
| 3147 | + if (NextMI != MBB->end()) { |
| 3148 | + // Only need WA at end of sequence of exports. |
| 3149 | + if (TII.isEXP(*NextMI)) |
| 3150 | + return Changed; |
| 3151 | + // Assume appropriate S_SETPRIO after export means WA already applied. |
| 3152 | + if (NextMI->getOpcode() == AMDGPU::S_SETPRIO && |
| 3153 | + NextMI->getOperand(0).getImm() == PostExportPriority) |
| 3154 | + return Changed; |
| 3155 | + EndOfShader = NextMI->getOpcode() == AMDGPU::S_ENDPGM; |
| 3156 | + } |
| 3157 | + |
| 3158 | + const DebugLoc &DL = MI->getDebugLoc(); |
| 3159 | + |
| 3160 | + // Lower priority. |
| 3161 | + BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO)) |
| 3162 | + .addImm(PostExportPriority); |
| 3163 | + |
| 3164 | + if (!EndOfShader) { |
| 3165 | + // Wait for exports to complete. |
| 3166 | + BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_WAITCNT_EXPCNT)) |
| 3167 | + .addReg(AMDGPU::SGPR_NULL) |
| 3168 | + .addImm(0); |
| 3169 | + } |
| 3170 | + |
| 3171 | + BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0); |
| 3172 | + BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0); |
| 3173 | + |
| 3174 | + if (!EndOfShader) { |
| 3175 | + // Return to normal (higher) priority. |
| 3176 | + BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO)) |
| 3177 | + .addImm(NormalPriority); |
| 3178 | + } |
| 3179 | + |
| 3180 | + return true; |
| 3181 | +} |
0 commit comments