Skip to content

AMDGPU: Delete FillMFMAShadowMutation #123861

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1074,7 +1074,6 @@ class GCNPassConfig final : public AMDGPUPassConfig {
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
if (ST.shouldClusterStores())
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(ST.createFillMFMAShadowMutation(DAG->TII));
DAG->addMutation(
createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA));
if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less))
Expand Down
116 changes: 0 additions & 116 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,6 @@ using namespace llvm;
#include "AMDGPUGenSubtargetInfo.inc"
#undef AMDGPUSubtarget

static cl::opt<bool>
EnablePowerSched("amdgpu-enable-power-sched",
cl::desc("Enable scheduling to minimize mAI power bursts"),
cl::init(false));

static cl::opt<bool> EnableVGPRIndexMode(
"amdgpu-vgpr-index-mode",
cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),
Expand Down Expand Up @@ -586,117 +581,6 @@ void GCNSubtarget::adjustSchedDependency(
}
}

namespace {
struct FillMFMAShadowMutation : ScheduleDAGMutation {
const SIInstrInfo *TII;

ScheduleDAGMI *DAG;

FillMFMAShadowMutation(const SIInstrInfo *tii) : TII(tii) {}

bool isSALU(const SUnit *SU) const {
const MachineInstr *MI = SU->getInstr();
return MI && TII->isSALU(*MI) && !MI->isTerminator();
}

bool isVALU(const SUnit *SU) const {
const MachineInstr *MI = SU->getInstr();
return MI && TII->isVALU(*MI);
}

// Link as many SALU instructions in chain as possible. Return the size
// of the chain. Links up to MaxChain instructions.
unsigned linkSALUChain(SUnit *From, SUnit *To, unsigned MaxChain,
SmallPtrSetImpl<SUnit *> &Visited) const {
SmallVector<SUnit *, 8> Worklist({To});
unsigned Linked = 0;

while (!Worklist.empty() && MaxChain-- > 0) {
SUnit *SU = Worklist.pop_back_val();
if (!Visited.insert(SU).second)
continue;

LLVM_DEBUG(dbgs() << "Inserting edge from\n"; DAG->dumpNode(*From);
dbgs() << "to\n"; DAG->dumpNode(*SU); dbgs() << '\n');

if (SU != From && From != &DAG->ExitSU && DAG->canAddEdge(SU, From))
if (DAG->addEdge(SU, SDep(From, SDep::Artificial)))
++Linked;

for (SDep &SI : From->Succs) {
SUnit *SUv = SI.getSUnit();
if (SUv != From && SU != &DAG->ExitSU && isVALU(SUv) &&
DAG->canAddEdge(SUv, SU))
DAG->addEdge(SUv, SDep(SU, SDep::Artificial));
}

for (SDep &SI : SU->Succs) {
SUnit *Succ = SI.getSUnit();
if (Succ != SU && isSALU(Succ))
Worklist.push_back(Succ);
}
}

return Linked;
}

void apply(ScheduleDAGInstrs *DAGInstrs) override {
const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget<GCNSubtarget>();
if (!ST.hasMAIInsts())
return;
DAG = static_cast<ScheduleDAGMI *>(DAGInstrs);
const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel();
if (!TSchedModel || DAG->SUnits.empty())
return;

// Scan for MFMA long latency instructions and try to add a dependency
// of available SALU instructions to give them a chance to fill MFMA
// shadow. That is desirable to fill MFMA shadow with SALU instructions
// rather than VALU to prevent power consumption bursts and throttle.
auto LastSALU = DAG->SUnits.begin();
auto E = DAG->SUnits.end();
SmallPtrSet<SUnit *, 32> Visited;
for (SUnit &SU : DAG->SUnits) {
MachineInstr &MAI = *SU.getInstr();
if (!TII->isMAI(MAI) ||
MAI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
MAI.getOpcode() == AMDGPU::V_ACCVGPR_READ_B32_e64)
continue;

unsigned Lat = TSchedModel->computeInstrLatency(&MAI) - 1;

LLVM_DEBUG(dbgs() << "Found MFMA: "; DAG->dumpNode(SU);
dbgs() << "Need " << Lat
<< " instructions to cover latency.\n");

// Find up to Lat independent scalar instructions as early as
// possible such that they can be scheduled after this MFMA.
for (; Lat && LastSALU != E; ++LastSALU) {
if (Visited.count(&*LastSALU))
continue;

if (&SU == &DAG->ExitSU || &SU == &*LastSALU || !isSALU(&*LastSALU) ||
!DAG->canAddEdge(&*LastSALU, &SU))
continue;

Lat -= linkSALUChain(&SU, &*LastSALU, Lat, Visited);
}
}
}
};
} // namespace

void GCNSubtarget::getPostRAMutations(
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
Mutations.push_back(std::make_unique<FillMFMAShadowMutation>(&InstrInfo));
}

std::unique_ptr<ScheduleDAGMutation>
GCNSubtarget::createFillMFMAShadowMutation(const TargetInstrInfo *TII) const {
return EnablePowerSched ? std::make_unique<FillMFMAShadowMutation>(&InstrInfo)
: nullptr;
}

unsigned GCNSubtarget::getNSAThreshold(const MachineFunction &MF) const {
if (getGeneration() >= AMDGPUSubtarget::GFX12)
return 0; // Not MIMG encoding.
Expand Down
7 changes: 0 additions & 7 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -1575,13 +1575,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// unit requirement.
unsigned getMaxNumVGPRs(const MachineFunction &MF) const;

void getPostRAMutations(
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
const override;

std::unique_ptr<ScheduleDAGMutation>
createFillMFMAShadowMutation(const TargetInstrInfo *TII) const;

bool isWave32() const {
return getWavefrontSize() == 32;
}
Expand Down
26 changes: 0 additions & 26 deletions llvm/test/CodeGen/AMDGPU/power-sched-no-cycle.mir

This file was deleted.

Loading