|
14 | 14 | #include "AMDGPU.h"
|
15 | 15 | #include "GCNSubtarget.h"
|
16 | 16 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
| 17 | +#include "SIDefines.h" |
17 | 18 | #include "SIInstrInfo.h"
|
| 19 | +#include "Utils/AMDGPUBaseInfo.h" |
18 | 20 | #include "llvm/ADT/SetVector.h"
|
| 21 | +#include "llvm/CodeGen/MachineInstr.h" |
19 | 22 | #include "llvm/MC/MCRegister.h"
|
| 23 | +#include "llvm/Support/ErrorHandling.h" |
20 | 24 |
|
21 | 25 | using namespace llvm;
|
22 | 26 |
|
@@ -57,6 +61,19 @@ class AMDGPUInsertDelayAlu : public MachineFunctionPass {
|
57 | 61 | return false;
|
58 | 62 | }
|
59 | 63 |
|
| 64 | + static bool instructionWaitsForSALUWrites(const MachineInstr &MI) { |
| 65 | + // These instruction types wait for VA_SDST==0 before issuing. |
| 66 | + // S_CBRANCH_EXECZ and S_CBRANCH_VCCZ are covered by SALU flag |
| 67 | + const uint64_t VA_SDST_0 = SIInstrFlags::SALU | SIInstrFlags::EXP | |
| 68 | + SIInstrFlags::DS | SIInstrFlags::SMRD | |
| 69 | + SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | |
| 70 | + SIInstrFlags::VSAMPLE; |
| 71 | + |
| 72 | + if (MI.getDesc().TSFlags & VA_SDST_0) |
| 73 | + return true; |
| 74 | + return false; |
| 75 | + } |
| 76 | + |
60 | 77 | // Types of delay that can be encoded in an s_delay_alu instruction.
|
61 | 78 | enum DelayType { VALU, TRANS, SALU, OTHER };
|
62 | 79 |
|
@@ -365,7 +382,7 @@ class AMDGPUInsertDelayAlu : public MachineFunctionPass {
|
365 | 382 |
|
366 | 383 | DelayType Type = getDelayType(MI.getDesc().TSFlags);
|
367 | 384 |
|
368 |
| - if (SII->isSALU(MI.getOpcode())) { |
| 385 | + if (instructionWaitsForSALUWrites(MI)) { |
369 | 386 | auto It = State.find(lastSGPRfromVALU);
|
370 | 387 | if (It != State.end()) {
|
371 | 388 | DelayInfo Info = It->getSecond();
|
|
0 commit comments