-
Notifications
You must be signed in to change notification settings - Fork 14.3k
RegAllocGreedy: Add dummy priority advisor for writing MIR tests #121207
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
I regularly struggle reproducing failures in greedy due to changes in priority when resuming the allocation from MIR vs. a complete compilation starting at IR. That is, the fix in e0919b1 did not really fix the problem of the instruction distance mattering. Add a way to bypass all of the priority heuristics for MIR tests, by prioritizing only by virtual register number. Could also give this a more specific name, like PrioritizeLowVirtRegNumber
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-regalloc Author: Matt Arsenault (arsenm) ChangesI regularly struggle reproducing failures in greedy due to changes Add a way to bypass all of the priority heuristics for MIR tests, Full diff: https://github.com/llvm/llvm-project/pull/121207.diff 4 Files Affected:
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 4fa2bc76b38b4b..95a7801c372f7a 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -376,6 +376,12 @@ unsigned DefaultPriorityAdvisor::getPriority(const LiveInterval &LI) const {
return Prio;
}
+unsigned DummyPriorityAdvisor::getPriority(const LiveInterval &LI) const {
+ // Prioritize by virtual register number, lowest first.
+ Register Reg = LI.reg();
+ return ~Reg.virtRegIndex();
+}
+
const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
const LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
diff --git a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp b/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp
index 0650aaff56ea00..4525b8fc5a383f 100644
--- a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp
+++ b/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp
@@ -30,7 +30,10 @@ static cl::opt<RegAllocPriorityAdvisorAnalysis::AdvisorMode> Mode(
clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Release,
"release", "precompiled"),
clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Development,
- "development", "for training")));
+ "development", "for training"),
+ clEnumValN(
+ RegAllocPriorityAdvisorAnalysis::AdvisorMode::Dummy, "dummy",
+ "prioritize low virtual register numbers for test and debug")));
char RegAllocPriorityAdvisorAnalysis::ID = 0;
INITIALIZE_PASS(RegAllocPriorityAdvisorAnalysis, "regalloc-priority",
@@ -67,6 +70,31 @@ class DefaultPriorityAdvisorAnalysis final
}
const bool NotAsRequested;
};
+
+class DummyPriorityAdvisorAnalysis final
+ : public RegAllocPriorityAdvisorAnalysis {
+public:
+ DummyPriorityAdvisorAnalysis()
+ : RegAllocPriorityAdvisorAnalysis(AdvisorMode::Dummy) {}
+
+ // support for isa<> and dyn_cast.
+ static bool classof(const RegAllocPriorityAdvisorAnalysis *R) {
+ return R->getAdvisorMode() == AdvisorMode::Dummy;
+ }
+
+private:
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<SlotIndexesWrapperPass>();
+ RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU);
+ }
+
+ std::unique_ptr<RegAllocPriorityAdvisor>
+ getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
+ return std::make_unique<DummyPriorityAdvisor>(
+ MF, RA, &getAnalysis<SlotIndexesWrapperPass>().getSI());
+ }
+};
+
} // namespace
template <> Pass *llvm::callDefaultCtor<RegAllocPriorityAdvisorAnalysis>() {
@@ -75,6 +103,9 @@ template <> Pass *llvm::callDefaultCtor<RegAllocPriorityAdvisorAnalysis>() {
case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Default:
Ret = new DefaultPriorityAdvisorAnalysis(/*NotAsRequested*/ false);
break;
+ case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Dummy:
+ Ret = new DummyPriorityAdvisorAnalysis();
+ break;
case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Development:
#if defined(LLVM_HAVE_TFLITE)
Ret = createDevelopmentModePriorityAdvisor();
@@ -97,6 +128,8 @@ StringRef RegAllocPriorityAdvisorAnalysis::getPassName() const {
return "Release mode Regalloc Priority Advisor";
case AdvisorMode::Development:
return "Development mode Regalloc Priority Advisor";
+ case AdvisorMode::Dummy:
+ return "Dummy Regalloc Priority Advisor";
}
llvm_unreachable("Unknown advisor kind");
}
diff --git a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h b/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h
index 1e9fa967214cc8..32e4598b715392 100644
--- a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h
+++ b/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h
@@ -56,9 +56,21 @@ class DefaultPriorityAdvisor : public RegAllocPriorityAdvisor {
unsigned getPriority(const LiveInterval &LI) const override;
};
+/// Stupid priority advisor which just enqueues in virtual register number
+/// order, for debug purposes only.
+class DummyPriorityAdvisor : public RegAllocPriorityAdvisor {
+public:
+ DummyPriorityAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+ SlotIndexes *const Indexes)
+ : RegAllocPriorityAdvisor(MF, RA, Indexes) {}
+
+private:
+ unsigned getPriority(const LiveInterval &LI) const override;
+};
+
class RegAllocPriorityAdvisorAnalysis : public ImmutablePass {
public:
- enum class AdvisorMode : int { Default, Release, Development };
+ enum class AdvisorMode : int { Default, Release, Development, Dummy };
RegAllocPriorityAdvisorAnalysis(AdvisorMode Mode)
: ImmutablePass(ID), Mode(Mode){};
diff --git a/llvm/test/CodeGen/AMDGPU/dummy-regalloc-priority-advisor.mir b/llvm/test/CodeGen/AMDGPU/dummy-regalloc-priority-advisor.mir
new file mode 100644
index 00000000000000..5c7c07632f0d59
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/dummy-regalloc-priority-advisor.mir
@@ -0,0 +1,54 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=greedy,2 -stress-regalloc=4 -stop-after=virtregrewriter,2 -regalloc-enable-priority-advisor=default -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=greedy,2 -stress-regalloc=4 -stop-after=virtregrewriter,2 -regalloc-enable-priority-advisor=dummy -o - %s | FileCheck -check-prefixes=CHECK,DUMMY %s
+
+# Check that the regalloc-enable-priority-advisor=dummy option works
+# and the result is different from the default. Ordinarily %1 would be
+# prioritized higher than %0 due to the register class priority
+
+---
+name: foo
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+registers:
+ - { id: 0, class: vgpr_32 }
+ - { id: 1, class: vreg_128 }
+ - { id: 2, class: vgpr_32 }
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; DEFAULT-LABEL: name: foo
+ ; DEFAULT: liveins: $vgpr0, $vgpr1
+ ; DEFAULT-NEXT: {{ $}}
+ ; DEFAULT-NEXT: SI_SPILL_V128_SAVE $vgpr1_vgpr2_vgpr3_vgpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5)
+ ; DEFAULT-NEXT: SI_SPILL_V32_SAVE $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+ ; DEFAULT-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
+ ; DEFAULT-NEXT: renamable $vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ ; DEFAULT-NEXT: renamable $vgpr3 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; DEFAULT-NEXT: renamable $vgpr3 = V_ADD_U32_e32 killed $vgpr2, killed $vgpr3, implicit $exec
+ ; DEFAULT-NEXT: SI_RETURN implicit $vgpr3, implicit $vgpr0, implicit $vgpr1
+ ;
+ ; DUMMY-LABEL: name: foo
+ ; DUMMY: liveins: $vgpr0, $vgpr1
+ ; DUMMY-NEXT: {{ $}}
+ ; DUMMY-NEXT: SI_SPILL_V128_SAVE $vgpr1_vgpr2_vgpr3_vgpr4, %stack.1, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.1, align 4, addrspace 5)
+ ; DUMMY-NEXT: SI_SPILL_V32_SAVE $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+ ; DUMMY-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
+ ; DUMMY-NEXT: renamable $vgpr2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; DUMMY-NEXT: renamable $vgpr3_vgpr4_vgpr5_vgpr6 = SI_SPILL_V128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5)
+ ; DUMMY-NEXT: renamable $vgpr3 = V_ADD_U32_e32 killed $vgpr3, killed $vgpr2, implicit $exec
+ ; DUMMY-NEXT: SI_RETURN implicit $vgpr3, implicit $vgpr0, implicit $vgpr1
+ undef %1.sub0:vreg_128 = COPY $vgpr1
+ %0:vgpr_32 = COPY $vgpr0
+ S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
+ %2:vgpr_32 = V_ADD_U32_e32 %1.sub0, %0, implicit $exec
+ $vgpr3 = COPY %2
+ SI_RETURN implicit $vgpr3, implicit $vgpr0, implicit $vgpr1
+
+...
+
+# CHECK: {{.*}}
|
Local branch amd-gfx 1763907 Merged main:9cd774d1e49f into amd-gfx:557a5107ab62 Remote branch main 11e482c RegAllocGreedy: Add dummy priority advisor for writing MIR tests (llvm#121207)
I regularly struggle reproducing failures in greedy due to changes
in priority when resuming the allocation from MIR vs. a complete
compilation starting at IR. That is, the fix in
e0919b1 did not really fix the
problem of the instruction distance mattering.
Add a way to bypass all of the priority heuristics for MIR tests,
by prioritizing only by virtual register number. Could also
give this a more specific name, like PrioritizeLowVirtRegNumber