Skip to content

Commit 7ee0cca

Browse files
Rebase: fix failing test
1 parent 58035b5 commit 7ee0cca

File tree

5 files changed

+417
-0
lines changed

5 files changed

+417
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,17 @@ extern char &GCNRewritePartialRegUsesID;
466466
void initializeAMDGPUWaitSGPRHazardsLegacyPass(PassRegistry &);
467467
extern char &AMDGPUWaitSGPRHazardsLegacyID;
468468

469+
void initializeAMDGPUUniformIntrinsicCombinePass(PassRegistry &);
470+
extern char &AMDGPUUniformIntrinsicCombineID;
471+
FunctionPass *createAMDGPUUniformIntrinsicCombinePass();
472+
473+
struct AMDGPUUniformIntrinsicCombinePass
474+
: public PassInfoMixin<AMDGPUUniformIntrinsicCombinePass> {
475+
const AMDGPUTargetMachine &TM;
476+
AMDGPUUniformIntrinsicCombinePass(const AMDGPUTargetMachine &TM_) : TM(TM_) {}
477+
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
478+
};
479+
469480
namespace AMDGPU {
470481
enum TargetIndex {
471482
TI_CONSTDATA_START,

llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ FUNCTION_PASS("amdgpu-unify-divergent-exit-nodes",
6767
AMDGPUUnifyDivergentExitNodesPass())
6868
FUNCTION_PASS("amdgpu-usenative", AMDGPUUseNativeCallsPass())
6969
FUNCTION_PASS("si-annotate-control-flow", SIAnnotateControlFlowPass(*static_cast<const GCNTargetMachine *>(this)))
70+
FUNCTION_PASS("amdgpu-uniform-intrinsic-combine", AMDGPUUniformIntrinsicCombinePass(*this))
7071
#undef FUNCTION_PASS
7172

7273
#ifndef FUNCTION_ANALYSIS
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
//===-- AMDGPUUniformIntrinsicCombine.cpp
2+
//-----------------------------------------===//
3+
//
4+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
//
8+
//===----------------------------------------------------------------------===//
9+
//
10+
/// \file
11+
/// This pass combines uniform intrinsic instructions.
12+
/// Unifrom Intrinsic combine uses pattern match to identify and optimize
13+
/// redundent intrinsic instruction.
14+
//===----------------------------------------------------------------------===//
15+
16+
#include "AMDGPU.h"
17+
#include "GCNSubtarget.h"
18+
#include "llvm/Analysis/DomTreeUpdater.h"
19+
#include "llvm/Analysis/UniformityAnalysis.h"
20+
#include "llvm/CodeGen/TargetPassConfig.h"
21+
#include "llvm/IR/IRBuilder.h"
22+
#include "llvm/IR/InstVisitor.h"
23+
#include "llvm/IR/IntrinsicsAMDGPU.h"
24+
#include "llvm/IR/PatternMatch.h"
25+
#include "llvm/InitializePasses.h"
26+
#include "llvm/Target/TargetMachine.h"
27+
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
28+
29+
#define DEBUG_TYPE "amdgpu-uniform-intrinsic-combine"
30+
31+
using namespace llvm;
32+
using namespace llvm::AMDGPU;
33+
using namespace llvm::PatternMatch;
34+
35+
namespace {
36+
37+
class AMDGPUUniformIntrinsicCombine : public FunctionPass {
38+
public:
39+
static char ID;
40+
AMDGPUUniformIntrinsicCombine() : FunctionPass(ID) {}
41+
42+
bool runOnFunction(Function &F) override;
43+
44+
void getAnalysisUsage(AnalysisUsage &AU) const override {
45+
AU.addPreserved<DominatorTreeWrapperPass>();
46+
AU.addRequired<UniformityInfoWrapperPass>();
47+
AU.addRequired<TargetPassConfig>();
48+
}
49+
};
50+
51+
class AMDGPUUniformIntrinsicCombineImpl
52+
: public InstVisitor<AMDGPUUniformIntrinsicCombineImpl> {
53+
private:
54+
const UniformityInfo *UI;
55+
56+
void optimizeUniformIntrinsicInst(IntrinsicInst &II) const;
57+
58+
public:
59+
AMDGPUUniformIntrinsicCombineImpl() = delete;
60+
61+
AMDGPUUniformIntrinsicCombineImpl(const UniformityInfo *UI) : UI(UI) {}
62+
63+
bool run(Function &F);
64+
};
65+
66+
} // namespace
67+
68+
char AMDGPUUniformIntrinsicCombine::ID = 0;
69+
70+
char &llvm::AMDGPUUniformIntrinsicCombineID = AMDGPUUniformIntrinsicCombine::ID;
71+
72+
bool AMDGPUUniformIntrinsicCombine::runOnFunction(Function &F) {
73+
if (skipFunction(F)) {
74+
return false;
75+
}
76+
77+
const UniformityInfo *UI =
78+
&getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
79+
80+
return AMDGPUUniformIntrinsicCombineImpl(UI).run(F);
81+
}
82+
83+
PreservedAnalyses
84+
AMDGPUUniformIntrinsicCombinePass::run(Function &F,
85+
FunctionAnalysisManager &AM) {
86+
87+
const auto *UI = &AM.getResult<UniformityInfoAnalysis>(F);
88+
89+
// @todo check if it is required that this method must return bool, if so
90+
// figure out what can be returned.
91+
bool IsChanged = AMDGPUUniformIntrinsicCombineImpl(UI).run(F);
92+
93+
if (!IsChanged) {
94+
return PreservedAnalyses::all();
95+
}
96+
97+
PreservedAnalyses PA;
98+
PA.preserve<DominatorTreeAnalysis>();
99+
return PA;
100+
}
101+
102+
bool AMDGPUUniformIntrinsicCombineImpl::run(Function &F) {
103+
104+
// @todo check if it is required that this method must return bool, if so
105+
// figure out what can be returned.
106+
const bool IsChanged{false};
107+
108+
// Iterate over each instruction in the function to get the desired intrinsic
109+
// inst to check for optimization.
110+
for (BasicBlock &BB : F) {
111+
for (Instruction &I : BB) {
112+
if (auto *Call = dyn_cast<CallInst>(&I)) {
113+
if (auto *Intrinsic = dyn_cast<IntrinsicInst>(Call)) {
114+
optimizeUniformIntrinsicInst(*Intrinsic);
115+
}
116+
}
117+
}
118+
}
119+
120+
return IsChanged;
121+
}
122+
123+
void AMDGPUUniformIntrinsicCombineImpl::optimizeUniformIntrinsicInst(
124+
IntrinsicInst &II) const {
125+
llvm::Intrinsic::ID IID = II.getIntrinsicID();
126+
127+
switch (IID) {
128+
case Intrinsic::amdgcn_permlane64: {
129+
Value *Src = II.getOperand(0);
130+
if (UI->isUniform(Src)) {
131+
return II.replaceAllUsesWith(Src);
132+
}
133+
break;
134+
}
135+
case Intrinsic::amdgcn_readfirstlane:
136+
case Intrinsic::amdgcn_readlane: {
137+
Value *Srcv = II.getOperand(0);
138+
if (UI->isUniform(Srcv)) {
139+
return II.replaceAllUsesWith(Srcv);
140+
}
141+
142+
// The rest of these may not be safe if the exec may not be the same between
143+
// the def and use.
144+
Value *Src = II.getArgOperand(0);
145+
Instruction *SrcInst = dyn_cast<Instruction>(Src);
146+
if (SrcInst && SrcInst->getParent() != II.getParent())
147+
break;
148+
149+
// readfirstlane (readfirstlane x) -> readfirstlane x
150+
// readlane (readfirstlane x), y -> readfirstlane x
151+
if (match(Src,
152+
PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readfirstlane>())) {
153+
return II.replaceAllUsesWith(Src);
154+
}
155+
156+
if (IID == Intrinsic::amdgcn_readfirstlane) {
157+
// readfirstlane (readlane x, y) -> readlane x, y
158+
if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>())) {
159+
return II.replaceAllUsesWith(Src);
160+
}
161+
} else {
162+
// readlane (readlane x, y), y -> readlane x, y
163+
if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>(
164+
PatternMatch::m_Value(),
165+
PatternMatch::m_Specific(II.getArgOperand(1))))) {
166+
return II.replaceAllUsesWith(Src);
167+
}
168+
}
169+
break;
170+
}
171+
}
172+
}
173+
174+
INITIALIZE_PASS_BEGIN(AMDGPUUniformIntrinsicCombine, DEBUG_TYPE,
175+
"AMDGPU uniformIntrinsic Combine", false, false)
176+
INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
177+
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
178+
INITIALIZE_PASS_END(AMDGPUUniformIntrinsicCombine, DEBUG_TYPE,
179+
"AMDGPU uniformIntrinsic Combine", false, false)
180+
181+
FunctionPass *llvm::createAMDGPUUniformIntrinsicCombinePass() {
182+
return new AMDGPUUniformIntrinsicCombine();
183+
}

llvm/lib/Target/AMDGPU/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ add_llvm_target(AMDGPUCodeGen
6161
AMDGPUHSAMetadataStreamer.cpp
6262
AMDGPUInsertDelayAlu.cpp
6363
AMDGPUInstCombineIntrinsic.cpp
64+
AMDGPUUniformIntrinsicCombine.cpp
6465
AMDGPUInstrInfo.cpp
6566
AMDGPUInstructionSelector.cpp
6667
AMDGPUISelDAGToDAG.cpp

0 commit comments

Comments
 (0)