Skip to content

Commit 03375a3

Browse files
committed
[HIP] [AlwaysInliner] Disable AlwaysInliner to eliminate undefined symbols
By default clang emits complete contructors as alias of base constructors if they are the same. The backend is supposed to emit symbols for the alias, otherwise it causes undefined symbols. @yaxunl observed that this issue is related to the llvm options `-amdgpu-early-inline-all=true` and `-amdgpu-function-calls=false`. This issue is resolved by only inlining global values with internal linkage. The `getCalleeFunction()` in AMDGPUResourceUsageAnalysis also had to be extended to support aliases to functions. inline-calls.ll was corrected appropriately. Reviewed By: yaxunl, #amdgpu Differential Revision: https://reviews.llvm.org/D109707
1 parent 4e572db commit 03375a3

File tree

5 files changed

+37
-11
lines changed

5 files changed

+37
-11
lines changed

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5089,9 +5089,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
50895089
}
50905090

50915091
// Enable -mconstructor-aliases except on darwin, where we have to work around
5092-
// a linker bug (see <rdar://problem/7651567>), and CUDA/AMDGPU device code,
5093-
// where aliases aren't supported.
5094-
if (!RawTriple.isOSDarwin() && !RawTriple.isNVPTX() && !RawTriple.isAMDGPU())
5092+
// a linker bug (see <rdar://problem/7651567>), and CUDA device code, where
5093+
// aliases aren't supported.
5094+
if (!RawTriple.isOSDarwin() && !RawTriple.isNVPTX())
50955095
CmdArgs.push_back("-mconstructor-aliases");
50965096

50975097
// Darwin's kernel doesn't support guard variables; just die if we
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// REQUIRES: amdgpu-registered-target, clang-driver
2+
3+
// RUN: %clang --offload-arch=gfx906 --cuda-device-only -nogpulib -nogpuinc -x hip -emit-llvm -S -o - %s \
4+
// RUN: -fgpu-rdc -O3 -mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false | \
5+
// RUN: FileCheck %s
6+
7+
#include "Inputs/cuda.h"
8+
9+
// CHECK: %struct.B = type { i8 }
10+
struct B {
11+
12+
// CHECK: @_ZN1BC1Ei = hidden unnamed_addr alias void (%struct.B*, i32), void (%struct.B*, i32)* @_ZN1BC2Ei
13+
__device__ B(int x);
14+
};
15+
16+
__device__ B::B(int x) {
17+
}

llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "AMDGPU.h"
1616
#include "AMDGPUTargetMachine.h"
1717
#include "Utils/AMDGPUBaseInfo.h"
18+
#include "llvm/CodeGen/CommandFlags.h"
1819
#include "llvm/IR/Module.h"
1920
#include "llvm/Pass.h"
2021
#include "llvm/Support/CommandLine.h"
@@ -90,9 +91,13 @@ static bool alwaysInlineImpl(Module &M, bool GlobalOpt) {
9091

9192
SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
9293
SmallPtrSet<Function *, 8> FuncsToNoInline;
94+
Triple TT(M.getTargetTriple());
9395

9496
for (GlobalAlias &A : M.aliases()) {
9597
if (Function* F = dyn_cast<Function>(A.getAliasee())) {
98+
if (TT.getArch() == Triple::amdgcn &&
99+
A.getLinkage() != GlobalValue::InternalLinkage)
100+
continue;
96101
A.replaceAllUsesWith(F);
97102
AliasesToRemove.push_back(&A);
98103
}

llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
#include "SIMachineFunctionInfo.h"
3030
#include "llvm/Analysis/CallGraph.h"
3131
#include "llvm/CodeGen/TargetPassConfig.h"
32+
#include "llvm/IR/GlobalAlias.h"
33+
#include "llvm/IR/GlobalValue.h"
3234
#include "llvm/Target/TargetMachine.h"
3335

3436
using namespace llvm;
@@ -61,7 +63,8 @@ static const Function *getCalleeFunction(const MachineOperand &Op) {
6163
assert(Op.getImm() == 0);
6264
return nullptr;
6365
}
64-
66+
if (auto *GA = dyn_cast<GlobalAlias>(Op.getGlobal()))
67+
return cast<Function>(GA->getOperand(0));
6568
return cast<Function>(Op.getGlobal());
6669
}
6770

llvm/test/CodeGen/AMDGPU/inline-calls.ll

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s
2-
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
3-
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s
1+
; RUN: llc -mtriple amdgcn-unknown-linux-gnu -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s
2+
; RUN: llc -mtriple amdgcn-unknown-linux-gnu -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
3+
; RUN: llc -mtriple r600-unknown-linux-gnu -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s --check-prefix=R600
44

55
; ALL-NOT: {{^}}func:
66
define internal i32 @func(i32 %a) {
@@ -9,7 +9,7 @@ entry:
99
ret i32 %tmp0
1010
}
1111

12-
; ALL: {{^}}kernel:
12+
; CHECK: {{^}}kernel:
1313
; GCN-NOT: s_swappc_b64
1414
define amdgpu_kernel void @kernel(i32 addrspace(1)* %out) {
1515
entry:
@@ -18,12 +18,13 @@ entry:
1818
ret void
1919
}
2020

21-
; CHECK-NOT: func_alias
22-
; ALL-NOT: func_alias
21+
; CHECK: func_alias
22+
; R600-NOT: func_alias
2323
@func_alias = alias i32 (i32), i32 (i32)* @func
2424

25-
; ALL: {{^}}kernel3:
25+
; CHECK-NOT: {{^}}kernel3:
2626
; GCN-NOT: s_swappc_b64
27+
; R600: {{^}}kernel3:
2728
define amdgpu_kernel void @kernel3(i32 addrspace(1)* %out) {
2829
entry:
2930
%tmp0 = call i32 @func_alias(i32 1)

0 commit comments

Comments
 (0)