Skip to content

Commit 5dfdd34

Browse files
committed
AMDGPU: Don't try to fold wavefrontsize intrinsic in libcall simplify
It's not a libcall so doesn't really belong here to begin with. Relying on checking the target name and explicit features isn't particularly sound either. The library doesn't use the intrinsic anymore, so it doesn't matter anyway.
1 parent eb00555 commit 5dfdd34

File tree

4 files changed

+33
-77
lines changed

4 files changed

+33
-77
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ FunctionPass *createSIPreAllocateWWMRegsPass();
4848
FunctionPass *createSIFormMemoryClausesPass();
4949

5050
FunctionPass *createSIPostRABundlerPass();
51-
FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetMachine *);
51+
FunctionPass *createAMDGPUSimplifyLibCallsPass();
5252
FunctionPass *createAMDGPUUseNativeCallsPass();
5353
ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *);
5454
FunctionPass *createAMDGPUCodeGenPreparePass();
@@ -60,11 +60,8 @@ FunctionPass *createSIModeRegisterPass();
6060
FunctionPass *createGCNPreRAOptimizationsPass();
6161

6262
struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> {
63-
AMDGPUSimplifyLibCallsPass(TargetMachine &TM) : TM(TM) {}
63+
AMDGPUSimplifyLibCallsPass() {}
6464
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
65-
66-
private:
67-
TargetMachine &TM;
6865
};
6966

7067
struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> {

llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp

Lines changed: 9 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
#include "llvm/IR/IntrinsicInst.h"
2121
#include "llvm/IR/IntrinsicsAMDGPU.h"
2222
#include "llvm/InitializePasses.h"
23-
#include "llvm/Target/TargetMachine.h"
2423
#include <cmath>
2524

2625
#define DEBUG_TYPE "amdgpu-simplifylib"
@@ -49,8 +48,6 @@ class AMDGPULibCalls {
4948

5049
typedef llvm::AMDGPULibFunc FuncInfo;
5150

52-
const TargetMachine *TM;
53-
5451
bool UnsafeFPMath = false;
5552

5653
// -fuse-native.
@@ -101,13 +98,11 @@ class AMDGPULibCalls {
10198
bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
10299
const FuncInfo &FInfo);
103100

104-
// llvm.amdgcn.wavefrontsize
105-
bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B);
106-
107101
// Get insertion point at entry.
108102
BasicBlock::iterator getEntryIns(CallInst * UI);
109103
// Insert an Alloc instruction.
110104
AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
105+
111106
// Get a scalar native builtin single argument FP function
112107
FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
113108

@@ -126,7 +121,7 @@ class AMDGPULibCalls {
126121
}
127122

128123
public:
129-
AMDGPULibCalls(const TargetMachine *TM_ = nullptr) : TM(TM_) {}
124+
AMDGPULibCalls() {}
130125

131126
bool fold(CallInst *CI);
132127

@@ -148,8 +143,7 @@ namespace {
148143
public:
149144
static char ID; // Pass identification
150145

151-
AMDGPUSimplifyLibCalls(const TargetMachine *TM = nullptr)
152-
: FunctionPass(ID), Simplifier(TM) {
146+
AMDGPUSimplifyLibCalls() : FunctionPass(ID) {
153147
initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
154148
}
155149

@@ -602,18 +596,8 @@ bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
602596
bool AMDGPULibCalls::fold(CallInst *CI) {
603597
Function *Callee = CI->getCalledFunction();
604598
// Ignore indirect calls.
605-
if (!Callee || CI->isNoBuiltin())
606-
return false;
607-
608-
IRBuilder<> B(CI);
609-
switch (Callee->getIntrinsicID()) {
610-
case Intrinsic::not_intrinsic:
611-
break;
612-
case Intrinsic::amdgcn_wavefrontsize:
613-
return !EnablePreLink && fold_wavefrontsize(CI, B);
614-
default:
599+
if (!Callee || Callee->isIntrinsic() || CI->isNoBuiltin())
615600
return false;
616-
}
617601

618602
FuncInfo FInfo;
619603
if (!parseFunctionName(Callee->getName(), FInfo))
@@ -629,6 +613,8 @@ bool AMDGPULibCalls::fold(CallInst *CI) {
629613
if (TDOFold(CI, FInfo))
630614
return true;
631615

616+
IRBuilder<> B(CI);
617+
632618
if (FPMathOperator *FPOp = dyn_cast<FPMathOperator>(CI)) {
633619
// Under unsafe-math, evaluate calls if possible.
634620
// According to Brian Sumner, we can do this for all f32 function calls
@@ -1310,28 +1296,6 @@ bool AMDGPULibCalls::fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B,
13101296
return true;
13111297
}
13121298

1313-
bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) {
1314-
if (!TM)
1315-
return false;
1316-
1317-
StringRef CPU = TM->getTargetCPU();
1318-
StringRef Features = TM->getTargetFeatureString();
1319-
if ((CPU.empty() || CPU.equals_insensitive("generic")) &&
1320-
(Features.empty() || !Features.contains_insensitive("wavefrontsize")))
1321-
return false;
1322-
1323-
Function *F = CI->getParent()->getParent();
1324-
const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(*F);
1325-
unsigned N = ST.getWavefrontSize();
1326-
1327-
LLVM_DEBUG(errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with "
1328-
<< N << "\n");
1329-
1330-
CI->replaceAllUsesWith(ConstantInt::get(B.getInt32Ty(), N));
1331-
CI->eraseFromParent();
1332-
return true;
1333-
}
1334-
13351299
// Get insertion point at entry.
13361300
BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) {
13371301
Function * Func = UI->getParent()->getParent();
@@ -1642,8 +1606,8 @@ bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) {
16421606
}
16431607

16441608
// Public interface to the Simplify LibCalls pass.
1645-
FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetMachine *TM) {
1646-
return new AMDGPUSimplifyLibCalls(TM);
1609+
FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass() {
1610+
return new AMDGPUSimplifyLibCalls();
16471611
}
16481612

16491613
FunctionPass *llvm::createAMDGPUUseNativeCallsPass() {
@@ -1677,7 +1641,7 @@ bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) {
16771641

16781642
PreservedAnalyses AMDGPUSimplifyLibCallsPass::run(Function &F,
16791643
FunctionAnalysisManager &AM) {
1680-
AMDGPULibCalls Simplifier(&TM);
1644+
AMDGPULibCalls Simplifier;
16811645
Simplifier.initNativeFuncs();
16821646
Simplifier.initFunction(F);
16831647

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -631,7 +631,7 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
631631
[this](StringRef PassName, FunctionPassManager &PM,
632632
ArrayRef<PassBuilder::PipelineElement>) {
633633
if (PassName == "amdgpu-simplifylib") {
634-
PM.addPass(AMDGPUSimplifyLibCallsPass(*this));
634+
PM.addPass(AMDGPUSimplifyLibCallsPass());
635635
return true;
636636
}
637637
if (PassName == "amdgpu-usenative") {
@@ -683,11 +683,11 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
683683
});
684684

685685
PB.registerPipelineStartEPCallback(
686-
[this](ModulePassManager &PM, OptimizationLevel Level) {
686+
[](ModulePassManager &PM, OptimizationLevel Level) {
687687
FunctionPassManager FPM;
688688
FPM.addPass(AMDGPUUseNativeCallsPass());
689689
if (EnableLibCallSimplify && Level != OptimizationLevel::O0)
690-
FPM.addPass(AMDGPUSimplifyLibCallsPass(*this));
690+
FPM.addPass(AMDGPUSimplifyLibCallsPass());
691691
PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
692692
});
693693

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll

Lines changed: 19 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,16 @@
44
; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,W32 %s
55
; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
66

7-
; RUN: opt -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-WXX %s
8-
; RUN: opt -mtriple=amdgcn-- -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-WXX %s
9-
; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s
10-
; RUN: opt -mtriple=amdgcn-- -passes='default<O3>' -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s
11-
; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s
12-
; RUN: opt -mtriple=amdgcn-- -mcpu=tonga -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s
13-
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s
14-
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=-wavefrontsize32,+wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s
15-
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s
16-
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=-wavefrontsize32,+wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s
7+
; RUN: opt -O3 -S < %s | FileCheck -check-prefix=OPT %s
8+
; RUN: opt -mtriple=amdgcn-- -O3 -S < %s | FileCheck -check-prefix=OPT %s
9+
; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s
10+
; RUN: opt -mtriple=amdgcn-- -passes='default<O3>' -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s
11+
; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
12+
; RUN: opt -mtriple=amdgcn-- -mcpu=tonga -O3 -S < %s | FileCheck -check-prefix=OPT %s
13+
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
14+
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=-wavefrontsize32,+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
15+
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
16+
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=-wavefrontsize32,+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
1717

1818
; GCN-LABEL: {{^}}fold_wavefrontsize:
1919
; OPT-LABEL: define amdgpu_kernel void @fold_wavefrontsize(
@@ -22,10 +22,8 @@
2222
; W64: v_mov_b32_e32 [[V:v[0-9]+]], 64
2323
; GCN: store_{{dword|b32}} v{{.+}}, [[V]]
2424

25-
; OPT-W32: store i32 32, ptr addrspace(1) %arg, align 4
26-
; OPT-W64: store i32 64, ptr addrspace(1) %arg, align 4
27-
; OPT-WXX: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize()
28-
; OPT-WXX: store i32 %tmp, ptr addrspace(1) %arg, align 4
25+
; OPT: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize()
26+
; OPT: store i32 %tmp, ptr addrspace(1) %arg, align 4
2927
; OPT-NEXT: ret void
3028

3129
define amdgpu_kernel void @fold_wavefrontsize(ptr addrspace(1) nocapture %arg) {
@@ -43,12 +41,10 @@ bb:
4341
; GCN-NOT: cndmask
4442
; GCN: store_{{dword|b32}} v{{.+}}, [[V]]
4543

46-
; OPT-W32: store i32 1, ptr addrspace(1) %arg, align 4
47-
; OPT-W64: store i32 2, ptr addrspace(1) %arg, align 4
48-
; OPT-WXX: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize()
49-
; OPT-WXX: %tmp1 = icmp ugt i32 %tmp, 32
50-
; OPT-WXX: %tmp2 = select i1 %tmp1, i32 2, i32 1
51-
; OPT-WXX: store i32 %tmp2, ptr addrspace(1) %arg
44+
; OPT: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize()
45+
; OPT: %tmp1 = icmp ugt i32 %tmp, 32
46+
; OPT: %tmp2 = select i1 %tmp1, i32 2, i32 1
47+
; OPT: store i32 %tmp2, ptr addrspace(1) %arg
5248
; OPT-NEXT: ret void
5349

5450
define amdgpu_kernel void @fold_and_optimize_wavefrontsize(ptr addrspace(1) nocapture %arg) {
@@ -64,10 +60,9 @@ bb:
6460
; OPT-LABEL: define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize(
6561

6662
; OPT: bb:
67-
; OPT-WXX: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize()
68-
; OPT-WXX: %tmp1 = icmp ugt i32 %tmp, 32
69-
; OPT-WXX: bb3:
70-
; OPT-W64: store i32 1, ptr addrspace(1) %arg, align 4
63+
; OPT: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize()
64+
; OPT: %tmp1 = icmp ugt i32 %tmp, 32
65+
; OPT: bb3:
7166
; OPT-NEXT: ret void
7267

7368
define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize(ptr addrspace(1) nocapture %arg) {

0 commit comments

Comments
 (0)