Skip to content

Commit 0adb3eb

Browse files
committed
AMDGPU: Move libcall simplify into PeepholeEP
We were running this immediately on the incoming IR, which is still littered with temporary allocas obscuring trivial values. This needs to run after initial SROA to handle sincos insertion. Fixes: SWDEV-456865
1 parent 46e4256 commit 0adb3eb

File tree

2 files changed

+14
-6
lines changed

2 files changed

+14
-6
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -655,9 +655,6 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(
655655
PB.registerPipelineStartEPCallback(
656656
[](ModulePassManager &PM, OptimizationLevel Level) {
657657
FunctionPassManager FPM;
658-
FPM.addPass(AMDGPUUseNativeCallsPass());
659-
if (EnableLibCallSimplify && Level != OptimizationLevel::O0)
660-
FPM.addPass(AMDGPUSimplifyLibCallsPass());
661658
PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
662659
if (EnableHipStdPar)
663660
PM.addPass(HipStdParAcceleratorCodeSelectionPass());
@@ -681,6 +678,16 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(
681678
PM.addPass(AMDGPUAlwaysInlinePass());
682679
});
683680

681+
PB.registerPeepholeEPCallback(
682+
[](FunctionPassManager &FPM, OptimizationLevel Level) {
683+
if (Level == OptimizationLevel::O0)
684+
return;
685+
686+
FPM.addPass(AMDGPUUseNativeCallsPass());
687+
if (EnableLibCallSimplify)
688+
FPM.addPass(AMDGPUSimplifyLibCallsPass());
689+
});
690+
684691
PB.registerCGSCCOptimizerLateEPCallback(
685692
[this](CGSCCPassManager &PM, OptimizationLevel Level) {
686693
if (Level == OptimizationLevel::O0)

llvm/test/CodeGen/AMDGPU/amdgpu-libcall-sincos-pass-ordering.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@ define protected amdgpu_kernel void @swdev456865(ptr addrspace(1) %out0, ptr add
1212
; CHECK-LABEL: define protected amdgpu_kernel void @swdev456865(
1313
; CHECK-SAME: ptr addrspace(1) nocapture writeonly [[OUT0:%.*]], ptr addrspace(1) nocapture writeonly [[OUT1:%.*]], ptr addrspace(1) nocapture writeonly [[OUT2:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
1414
; CHECK-NEXT: entry:
15-
; CHECK-NEXT: [[I_I:%.*]] = tail call float asm "pseudo-libcall-sin [[TMP0:%.*]], %1", "=v,v"(float noundef [[X]]) #[[ATTR1:[0-9]+]]
16-
; CHECK-NEXT: [[I_I1:%.*]] = tail call float asm "pseudo-libcall-cos [[TMP0]], %1", "=v,v"(float noundef [[X]]) #[[ATTR1]]
17-
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[I_I]], [[I_I1]]
15+
; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5)
16+
; CHECK-NEXT: [[I_I:%.*]] = call float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) #[[ATTR1:[0-9]+]]
17+
; CHECK-NEXT: [[I_I2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4
18+
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[I_I]], [[I_I2]]
1819
; CHECK-NEXT: [[CONV:%.*]] = fpext float [[X]] to double
1920
; CHECK-NEXT: [[CONV5:%.*]] = fpext float [[ADD]] to double
2021
; CHECK-NEXT: store double [[CONV]], ptr addrspace(1) [[OUT0]], align 8

0 commit comments

Comments
 (0)