llvm · arsenm · Jan 22, 2025 · Jan 21, 2025 · Jan 22, 2025 · Jan 22, 2025
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1074,7 +1074,6 @@ class GCNPassConfig final : public AMDGPUPassConfig {
     DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
     if (ST.shouldClusterStores())
       DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
-    DAG->addMutation(ST.createFillMFMAShadowMutation(DAG->TII));
     DAG->addMutation(
         createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA));
     if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less))

diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
@@ -38,11 +38,6 @@ using namespace llvm;
 #include "AMDGPUGenSubtargetInfo.inc"
 #undef AMDGPUSubtarget
 
-static cl::opt<bool>
-    EnablePowerSched("amdgpu-enable-power-sched",
-                     cl::desc("Enable scheduling to minimize mAI power bursts"),
-                     cl::init(false));
-
 static cl::opt<bool> EnableVGPRIndexMode(
     "amdgpu-vgpr-index-mode",
     cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),
@@ -586,117 +581,6 @@ void GCNSubtarget::adjustSchedDependency(
   }
 }
 
-namespace {
-struct FillMFMAShadowMutation : ScheduleDAGMutation {
-  const SIInstrInfo *TII;
-
-  ScheduleDAGMI *DAG;
-
-  FillMFMAShadowMutation(const SIInstrInfo *tii) : TII(tii) {}
-
-  bool isSALU(const SUnit *SU) const {
-    const MachineInstr *MI = SU->getInstr();
-    return MI && TII->isSALU(*MI) && !MI->isTerminator();
-  }
-
-  bool isVALU(const SUnit *SU) const {
-    const MachineInstr *MI = SU->getInstr();
-    return MI && TII->isVALU(*MI);
-  }
-
-  // Link as many SALU instructions in chain as possible. Return the size
-  // of the chain. Links up to MaxChain instructions.
-  unsigned linkSALUChain(SUnit *From, SUnit *To, unsigned MaxChain,
-                         SmallPtrSetImpl<SUnit *> &Visited) const {
-    SmallVector<SUnit *, 8> Worklist({To});
-    unsigned Linked = 0;
-
-    while (!Worklist.empty() && MaxChain-- > 0) {
-      SUnit *SU = Worklist.pop_back_val();
-      if (!Visited.insert(SU).second)
-        continue;
-
-      LLVM_DEBUG(dbgs() << "Inserting edge from\n"; DAG->dumpNode(*From);
-                 dbgs() << "to\n"; DAG->dumpNode(*SU); dbgs() << '\n');
-
-      if (SU != From && From != &DAG->ExitSU && DAG->canAddEdge(SU, From))
-        if (DAG->addEdge(SU, SDep(From, SDep::Artificial)))
-          ++Linked;
-
-      for (SDep &SI : From->Succs) {
-        SUnit *SUv = SI.getSUnit();
-        if (SUv != From && SU != &DAG->ExitSU && isVALU(SUv) &&
-            DAG->canAddEdge(SUv, SU))
-          DAG->addEdge(SUv, SDep(SU, SDep::Artificial));
-      }
-
-      for (SDep &SI : SU->Succs) {
-        SUnit *Succ = SI.getSUnit();
-        if (Succ != SU && isSALU(Succ))
-          Worklist.push_back(Succ);
-      }
-    }
-
-    return Linked;
-  }
-
-  void apply(ScheduleDAGInstrs *DAGInstrs) override {
-    const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget<GCNSubtarget>();
-    if (!ST.hasMAIInsts())
-      return;
-    DAG = static_cast<ScheduleDAGMI *>(DAGInstrs);
-    const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel();
-    if (!TSchedModel || DAG->SUnits.empty())
-      return;
-
-    // Scan for MFMA long latency instructions and try to add a dependency
-    // of available SALU instructions to give them a chance to fill MFMA
-    // shadow. That is desirable to fill MFMA shadow with SALU instructions
-    // rather than VALU to prevent power consumption bursts and throttle.
-    auto LastSALU = DAG->SUnits.begin();
-    auto E = DAG->SUnits.end();
-    SmallPtrSet<SUnit *, 32> Visited;
-    for (SUnit &SU : DAG->SUnits) {
-      MachineInstr &MAI = *SU.getInstr();
-      if (!TII->isMAI(MAI) ||
-          MAI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
-          MAI.getOpcode() == AMDGPU::V_ACCVGPR_READ_B32_e64)
-        continue;
-
-      unsigned Lat = TSchedModel->computeInstrLatency(&MAI) - 1;
-
-      LLVM_DEBUG(dbgs() << "Found MFMA: "; DAG->dumpNode(SU);
-                 dbgs() << "Need " << Lat
-                        << " instructions to cover latency.\n");
-
-      // Find up to Lat independent scalar instructions as early as
-      // possible such that they can be scheduled after this MFMA.
-      for (; Lat && LastSALU != E; ++LastSALU) {
-        if (Visited.count(&*LastSALU))
-          continue;
-
-        if (&SU == &DAG->ExitSU || &SU == &*LastSALU || !isSALU(&*LastSALU) ||
-            !DAG->canAddEdge(&*LastSALU, &SU))
-          continue;
-
-        Lat -= linkSALUChain(&SU, &*LastSALU, Lat, Visited);
-      }
-    }
-  }
-};
-} // namespace
-
-void GCNSubtarget::getPostRAMutations(
-    std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
-  Mutations.push_back(std::make_unique<FillMFMAShadowMutation>(&InstrInfo));
-}
-
-std::unique_ptr<ScheduleDAGMutation>
-GCNSubtarget::createFillMFMAShadowMutation(const TargetInstrInfo *TII) const {
-  return EnablePowerSched ? std::make_unique<FillMFMAShadowMutation>(&InstrInfo)
-                          : nullptr;
-}
-
 unsigned GCNSubtarget::getNSAThreshold(const MachineFunction &MF) const {
   if (getGeneration() >= AMDGPUSubtarget::GFX12)
     return 0; // Not MIMG encoding.

diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1575,13 +1575,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   /// unit requirement.
   unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
 
-  void getPostRAMutations(
-      std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
-      const override;
-
-  std::unique_ptr<ScheduleDAGMutation>
-  createFillMFMAShadowMutation(const TargetInstrInfo *TII) const;
-
   bool isWave32() const {
     return getWavefrontSize() == 32;
   }

diff --git a/llvm/test/CodeGen/AMDGPU/power-sched-no-cycle.mir b/llvm/test/CodeGen/AMDGPU/power-sched-no-cycle.mir