Skip to content

Commit b83c960

Browse files
authored
[CodeGen][NewPM] Port SIWholeQuadMode to NPM. (llvm#125833)
1 parent e78be31 commit b83c960

File tree

9 files changed

+94
-30
lines changed

9 files changed

+94
-30
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ FunctionPass *createSIPeepholeSDWALegacyPass();
4141
FunctionPass *createSILowerI1CopiesLegacyPass();
4242
FunctionPass *createSIShrinkInstructionsLegacyPass();
4343
FunctionPass *createSILoadStoreOptimizerLegacyPass();
44-
FunctionPass *createSIWholeQuadModePass();
44+
FunctionPass *createSIWholeQuadModeLegacyPass();
4545
FunctionPass *createSIFixControlFlowLiveIntervalsPass();
4646
FunctionPass *createSIOptimizeExecMaskingPreRAPass();
4747
FunctionPass *createSIOptimizeVGPRLiveRangeLegacyPass();
@@ -204,7 +204,7 @@ extern char &SILowerSGPRSpillsLegacyID;
204204
void initializeSILoadStoreOptimizerLegacyPass(PassRegistry &);
205205
extern char &SILoadStoreOptimizerLegacyID;
206206

207-
void initializeSIWholeQuadModePass(PassRegistry &);
207+
void initializeSIWholeQuadModeLegacyPass(PassRegistry &);
208208
extern char &SIWholeQuadModeID;
209209

210210
void initializeSILowerControlFlowLegacyPass(PassRegistry &);

llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass())
111111
MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
112112
MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
113113
MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
114+
MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
114115
#undef MACHINE_FUNCTION_PASS
115116

116117
#define DUMMY_MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
@@ -140,7 +141,6 @@ DUMMY_MACHINE_FUNCTION_PASS("amdgpu-global-isel-divergence-lowering", AMDGPUGlob
140141
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbankselect", AMDGPURegBankSelectPass())
141142
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbanklegalize", AMDGPURegBankLegalizePass())
142143
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbank-combiner", AMDGPURegBankCombinerPass())
143-
DUMMY_MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
144144

145145
#undef DUMMY_MACHINE_FUNCTION_PASS
146146

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
#include "SIPeepholeSDWA.h"
5151
#include "SIPreAllocateWWMRegs.h"
5252
#include "SIShrinkInstructions.h"
53+
#include "SIWholeQuadMode.h"
5354
#include "TargetInfo/AMDGPUTargetInfo.h"
5455
#include "Utils/AMDGPUBaseInfo.h"
5556
#include "llvm/Analysis/CGSCCPassManager.h"
@@ -529,7 +530,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
529530
initializeSIInsertHardClausesPass(*PR);
530531
initializeSIInsertWaitcntsPass(*PR);
531532
initializeSIModeRegisterPass(*PR);
532-
initializeSIWholeQuadModePass(*PR);
533+
initializeSIWholeQuadModeLegacyPass(*PR);
533534
initializeSILowerControlFlowLegacyPass(*PR);
534535
initializeSIPreEmitPeepholePass(*PR);
535536
initializeSILateBranchLoweringPass(*PR);

llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp

Lines changed: 58 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
///
6868
//===----------------------------------------------------------------------===//
6969

70+
#include "SIWholeQuadMode.h"
7071
#include "AMDGPU.h"
7172
#include "GCNSubtarget.h"
7273
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -148,11 +149,19 @@ struct WorkItem {
148149
WorkItem(MachineInstr *MI) : MI(MI) {}
149150
};
150151

151-
class SIWholeQuadMode : public MachineFunctionPass {
152+
class SIWholeQuadMode {
153+
public:
154+
SIWholeQuadMode(MachineFunction &MF, LiveIntervals *LIS,
155+
MachineDominatorTree *MDT, MachinePostDominatorTree *PDT)
156+
: ST(&MF.getSubtarget<GCNSubtarget>()), TII(ST->getInstrInfo()),
157+
TRI(&TII->getRegisterInfo()), MRI(&MF.getRegInfo()), LIS(LIS), MDT(MDT),
158+
PDT(PDT) {}
159+
bool run(MachineFunction &MF);
160+
152161
private:
162+
const GCNSubtarget *ST;
153163
const SIInstrInfo *TII;
154164
const SIRegisterInfo *TRI;
155-
const GCNSubtarget *ST;
156165
MachineRegisterInfo *MRI;
157166
LiveIntervals *LIS;
158167
MachineDominatorTree *MDT;
@@ -225,12 +234,13 @@ class SIWholeQuadMode : public MachineFunctionPass {
225234
void lowerInitExec(MachineInstr &MI);
226235
MachineBasicBlock::iterator lowerInitExecInstrs(MachineBasicBlock &Entry,
227236
bool &Changed);
237+
};
228238

239+
class SIWholeQuadModeLegacy : public MachineFunctionPass {
229240
public:
230241
static char ID;
231242

232-
SIWholeQuadMode() :
233-
MachineFunctionPass(ID) { }
243+
SIWholeQuadModeLegacy() : MachineFunctionPass(ID) {}
234244

235245
bool runOnMachineFunction(MachineFunction &MF) override;
236246

@@ -250,23 +260,22 @@ class SIWholeQuadMode : public MachineFunctionPass {
250260
MachineFunctionProperties::Property::IsSSA);
251261
}
252262
};
253-
254263
} // end anonymous namespace
255264

256-
char SIWholeQuadMode::ID = 0;
265+
char SIWholeQuadModeLegacy::ID = 0;
257266

258-
INITIALIZE_PASS_BEGIN(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
259-
false)
267+
INITIALIZE_PASS_BEGIN(SIWholeQuadModeLegacy, DEBUG_TYPE, "SI Whole Quad Mode",
268+
false, false)
260269
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
261270
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
262271
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)
263-
INITIALIZE_PASS_END(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
264-
false)
272+
INITIALIZE_PASS_END(SIWholeQuadModeLegacy, DEBUG_TYPE, "SI Whole Quad Mode",
273+
false, false)
265274

266-
char &llvm::SIWholeQuadModeID = SIWholeQuadMode::ID;
275+
char &llvm::SIWholeQuadModeID = SIWholeQuadModeLegacy::ID;
267276

268-
FunctionPass *llvm::createSIWholeQuadModePass() {
269-
return new SIWholeQuadMode;
277+
FunctionPass *llvm::createSIWholeQuadModeLegacyPass() {
278+
return new SIWholeQuadModeLegacy;
270279
}
271280

272281
#ifndef NDEBUG
@@ -1689,7 +1698,7 @@ SIWholeQuadMode::lowerInitExecInstrs(MachineBasicBlock &Entry, bool &Changed) {
16891698
return InsertPt;
16901699
}
16911700

1692-
bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
1701+
bool SIWholeQuadMode::run(MachineFunction &MF) {
16931702
LLVM_DEBUG(dbgs() << "SI Whole Quad Mode on " << MF.getName()
16941703
<< " ------------- \n");
16951704
LLVM_DEBUG(MF.dump(););
@@ -1704,18 +1713,6 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
17041713
SetInactiveInstrs.clear();
17051714
StateTransition.clear();
17061715

1707-
ST = &MF.getSubtarget<GCNSubtarget>();
1708-
1709-
TII = ST->getInstrInfo();
1710-
TRI = &TII->getRegisterInfo();
1711-
MRI = &MF.getRegInfo();
1712-
LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
1713-
auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
1714-
MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr;
1715-
auto *PDTWrapper =
1716-
getAnalysisIfAvailable<MachinePostDominatorTreeWrapperPass>();
1717-
PDT = PDTWrapper ? &PDTWrapper->getPostDomTree() : nullptr;
1718-
17191716
if (ST->isWave32()) {
17201717
AndOpc = AMDGPU::S_AND_B32;
17211718
AndTermOpc = AMDGPU::S_AND_B32_term;
@@ -1816,3 +1813,38 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
18161813

18171814
return Changed;
18181815
}
1816+
1817+
bool SIWholeQuadModeLegacy::runOnMachineFunction(MachineFunction &MF) {
1818+
LiveIntervals *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
1819+
auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
1820+
MachineDominatorTree *MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr;
1821+
auto *PDTWrapper =
1822+
getAnalysisIfAvailable<MachinePostDominatorTreeWrapperPass>();
1823+
MachinePostDominatorTree *PDT =
1824+
PDTWrapper ? &PDTWrapper->getPostDomTree() : nullptr;
1825+
SIWholeQuadMode Impl(MF, LIS, MDT, PDT);
1826+
return Impl.run(MF);
1827+
}
1828+
1829+
PreservedAnalyses
1830+
SIWholeQuadModePass::run(MachineFunction &MF,
1831+
MachineFunctionAnalysisManager &MFAM) {
1832+
MFPropsModifier _(*this, MF);
1833+
1834+
LiveIntervals *LIS = &MFAM.getResult<LiveIntervalsAnalysis>(MF);
1835+
MachineDominatorTree *MDT =
1836+
MFAM.getCachedResult<MachineDominatorTreeAnalysis>(MF);
1837+
MachinePostDominatorTree *PDT =
1838+
MFAM.getCachedResult<MachinePostDominatorTreeAnalysis>(MF);
1839+
SIWholeQuadMode Impl(MF, LIS, MDT, PDT);
1840+
bool Changed = Impl.run(MF);
1841+
if (!Changed)
1842+
return PreservedAnalyses::all();
1843+
1844+
PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
1845+
PA.preserve<SlotIndexesAnalysis>();
1846+
PA.preserve<LiveIntervalsAnalysis>();
1847+
PA.preserve<MachineDominatorTreeAnalysis>();
1848+
PA.preserve<MachinePostDominatorTreeAnalysis>();
1849+
return PA;
1850+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
//===- SIWholeQuadMode.h ----------------------------------------*- C++- *-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H
10+
#define LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H
11+
12+
#include "llvm/CodeGen/MachinePassManager.h"
13+
14+
namespace llvm {
15+
class SIWholeQuadModePass : public PassInfoMixin<SIWholeQuadModePass> {
16+
public:
17+
PreservedAnalyses run(MachineFunction &MF,
18+
MachineFunctionAnalysisManager &MFAM);
19+
20+
MachineFunctionProperties getClearedProperties() const {
21+
return MachineFunctionProperties().set(
22+
MachineFunctionProperties::Property::IsSSA);
23+
}
24+
};
25+
} // namespace llvm
26+
27+
#endif // LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H

llvm/test/CodeGen/AMDGPU/licm-wwm.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
22
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -run-pass=early-machinelicm,si-wqm -o - %s | FileCheck -check-prefix=GCN %s
3+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=early-machinelicm,si-wqm -o - %s | FileCheck -check-prefix=GCN %s
34

45
# Machine LICM may hoist an intruction from a WWM region, which will force SI-WQM pass
56
# to create a second WWM region. This is an unwanted hoisting.

llvm/test/CodeGen/AMDGPU/si-init-whole-wave.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
22
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s
3+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -passes=si-wqm -o - %s | FileCheck %s
34

45
---
56
# Test that we don't do silly things when there is no whole wave mode in the

llvm/test/CodeGen/AMDGPU/wqm-terminators.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
22
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s
3+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-wqm -o - %s | FileCheck %s
34

45
--- |
56
define amdgpu_ps void @exit_to_exact() {

llvm/test/CodeGen/AMDGPU/wqm.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=fiji -passes=si-wqm -o - %s | FileCheck %s
23

34
--- |
45
define amdgpu_ps void @test_strict_wwm_scc() {

0 commit comments

Comments
 (0)