-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Introduce "amdgpu-uniform-intrinsic-combine" pass to combine uniform AMDGPU lane Intrinsics. #116953
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
be7ad78
8dae2e7
d8d3666
4311e65
1bed57f
ed204b9
847fef4
e0fc6fc
661ce96
8963961
5fb5f8c
982096c
d4b7ec0
6297b9d
603d5f6
2781457
261b4ff
c34d392
eb73c6a
1bca2e7
01e3ed6
51cb723
c9ace74
a1a0706
316472c
f22d719
f8da0bc
5e3c8fa
8a8f4f4
f5b900d
0dcbc1c
bb3a69e
61d1024
c058c3c
d2b8976
5b4cf1c
c96c9e8
5b5b32a
684d561
45c7468
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
//===-- AMDGPUUniformIntrinsicCombine.cpp ---------------------------------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
/// \file | ||
/// This pass simplifies certain intrinsic calls when the arguments are uniform. | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "AMDGPU.h" | ||
#include "GCNSubtarget.h" | ||
#include "llvm/Analysis/DomTreeUpdater.h" | ||
#include "llvm/Analysis/LoopInfo.h" | ||
#include "llvm/Analysis/ScalarEvolution.h" | ||
#include "llvm/Analysis/TargetLibraryInfo.h" | ||
#include "llvm/Analysis/UniformityAnalysis.h" | ||
#include "llvm/CodeGen/TargetPassConfig.h" | ||
#include "llvm/IR/IRBuilder.h" | ||
#include "llvm/IR/InstIterator.h" | ||
#include "llvm/IR/InstVisitor.h" | ||
#include "llvm/IR/IntrinsicsAMDGPU.h" | ||
#include "llvm/IR/PatternMatch.h" | ||
#include "llvm/InitializePasses.h" | ||
#include "llvm/Target/TargetMachine.h" | ||
#include "llvm/Transforms/Utils/BasicBlockUtils.h" | ||
|
||
#define DEBUG_TYPE "amdgpu-uniform-intrinsic-combine" | ||
|
||
using namespace llvm; | ||
using namespace llvm::AMDGPU; | ||
using namespace llvm::PatternMatch; | ||
|
||
/// Optimizes uniform intrinsics. | ||
static bool optimizeUniformIntrinsic(IntrinsicInst &II, | ||
const UniformityInfo &UI) { | ||
llvm::Intrinsic::ID IID = II.getIntrinsicID(); | ||
|
||
switch (IID) { | ||
case Intrinsic::amdgcn_permlane64: | ||
case Intrinsic::amdgcn_readfirstlane: | ||
case Intrinsic::amdgcn_readlane: { | ||
PankajDwivedi-25 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Value *Src = II.getArgOperand(0); | ||
PankajDwivedi-25 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// Check if the argument use is divergent | ||
if (UI.isDivergentUse(II.getOperandUse(0))) | ||
return false; | ||
LLVM_DEBUG(dbgs() << "Replacing " << II << " with " << *Src << '\n'); | ||
II.replaceAllUsesWith(Src); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is replacing a contextually / path dependent query with a value that is not. I think we need to attach some kind of convergent use call to capture the point here. What if later code motion moves it such that an assumed uniform value is no longer use-point uniform? You can maybe get away with replace only dominated uses by this instruction, but I'd need to think if there are still potential hazards if later transforms introduce divergence There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Haven't we already discussed something similar earlier? #116953 (review) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We are replacing an "always uniform" value X (the intrinsic call) with its uniform operand (Y), which can potentially become |
||
II.eraseFromParent(); | ||
return true; | ||
} | ||
case Intrinsic::amdgcn_ballot: { | ||
Value *Src = II.getArgOperand(0); | ||
if (UI.isDivergentUse(II.getOperandUse(0))) | ||
return false; | ||
LLVM_DEBUG(dbgs() << "Found uniform ballot intrinsic: " << II << '\n'); | ||
|
||
// If there are no ICmp users, return early. | ||
if (none_of(II.users(), [](User *U) { return isa<ICmpInst>(U); })) | ||
return false; | ||
|
||
bool Changed = false; | ||
for (User *U : make_early_inc_range(II.users())) { | ||
if (auto *ICmp = dyn_cast<ICmpInst>(U)) { | ||
Value *Op0 = ICmp->getOperand(0); | ||
Value *Op1 = ICmp->getOperand(1); | ||
ICmpInst::Predicate Pred = ICmp->getPredicate(); | ||
Value *OtherOp = Op0 == &II ? Op1 : Op0; | ||
|
||
if (Pred == ICmpInst::ICMP_EQ && match(OtherOp, m_Zero())) { | ||
// Case (icmp eq %ballot, 0) --> xor %ballot_arg, 1 | ||
Instruction *NotOp = | ||
BinaryOperator::CreateNot(Src, "", ICmp->getIterator()); | ||
LLVM_DEBUG(dbgs() << "Replacing ICMP_EQ: " << *NotOp << '\n'); | ||
ICmp->replaceAllUsesWith(NotOp); | ||
ICmp->eraseFromParent(); | ||
Changed = true; | ||
} else if (Pred == ICmpInst::ICMP_NE && match(OtherOp, m_Zero())) { | ||
// (icmp ne %ballot, 0) --> %ballot_arg | ||
LLVM_DEBUG(dbgs() << "Replacing ICMP_NE with ballot argument: " | ||
<< *Src << '\n'); | ||
ICmp->replaceAllUsesWith(Src); | ||
ICmp->eraseFromParent(); | ||
Changed = true; | ||
} | ||
} | ||
} | ||
// Erase the intrinsic if it has no remaining uses. | ||
if (II.use_empty()) | ||
II.eraseFromParent(); | ||
return Changed; | ||
} | ||
default: | ||
llvm_unreachable("Unexpected intrinsic ID in optimizeUniformIntrinsic"); | ||
} | ||
return false; | ||
} | ||
|
||
/// Iterate over the Intrinsics use in the Module to optimise. | ||
static bool runUniformIntrinsicCombine(Module &M, ModuleAnalysisManager &AM) { | ||
bool IsChanged = false; | ||
FunctionAnalysisManager &FAM = | ||
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); | ||
for (Function &F : M) { | ||
switch (F.getIntrinsicID()) { | ||
case Intrinsic::amdgcn_permlane64: | ||
case Intrinsic::amdgcn_readfirstlane: | ||
case Intrinsic::amdgcn_readlane: | ||
case Intrinsic::amdgcn_ballot: | ||
break; | ||
default: | ||
continue; | ||
} | ||
|
||
for (User *U : F.users()) { | ||
auto *II = cast<IntrinsicInst>(U); | ||
Function *ParentF = II->getFunction(); | ||
if (ParentF->isDeclaration()) | ||
continue; | ||
|
||
const auto &UI = FAM.getResult<UniformityInfoAnalysis>(*ParentF); | ||
IsChanged |= optimizeUniformIntrinsic(*II, UI); | ||
} | ||
} | ||
return IsChanged; | ||
} | ||
|
||
PreservedAnalyses | ||
AMDGPUUniformIntrinsicCombinePass::run(Module &M, ModuleAnalysisManager &AM) { | ||
if (!runUniformIntrinsicCombine(M, AM)) | ||
return PreservedAnalyses::all(); | ||
|
||
PreservedAnalyses PA; | ||
PA.preserve<UniformityInfoAnalysis>(); | ||
return PA; | ||
} |
Uh oh!
There was an error while loading. Please reload this page.