Skip to content

Commit 23431b5

Browse files
committed
[clang][CodeGen] Fix GPU-specific attributes being dropped by bitcode linking
Device libs make use of patterns like this: ``` __attribute__((target("gfx11-insts"))) static unsigned do_intrin_stuff(void) { return __builtin_amdgcn_s_sendmsg_rtnl(0x0); } ``` For functions that are assumed to be eliminated if the currennt GPU target doesn't support them. At O0 such functions aren't eliminated by common optimizations but often by AMDGPURemoveIncompatibleFunctions instead, which sees the "+gfx11-insts" attribute on, say, GFX9 and knows it's not valid, so it removes the function. D142907 accidentally made it so such attributes were dropped during bitcode linking, making it impossible for RemoveIncompatibleFunctions to catch the functions and causing ISel to catch fire eventually. This fixes the issue and adds a new test to ensure we don't accidentally fall into this trap again. Fixes SWDEV-403642 Reviewed By: arsenm, yaxunl Differential Revision: https://reviews.llvm.org/D152251
1 parent dcc8f94 commit 23431b5

File tree

6 files changed

+80
-14
lines changed

6 files changed

+80
-14
lines changed

clang/lib/CodeGen/CGCall.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2025,7 +2025,8 @@ void CodeGenModule::mergeDefaultFunctionDefinitionAttributes(
20252025
llvm::AttrBuilder FuncAttrs(F.getContext());
20262026
getTrivialDefaultFunctionAttributes(F.getName(), F.hasOptNone(),
20272027
/*AttrOnCallSite=*/false, FuncAttrs);
2028-
GetCPUAndFeaturesAttributes(GlobalDecl(), FuncAttrs);
2028+
GetCPUAndFeaturesAttributes(GlobalDecl(), FuncAttrs,
2029+
/*AddTargetFeatures=*/false);
20292030

20302031
if (!WillInternalize && F.isInterposable()) {
20312032
// Do not promote "dynamic" denormal-fp-math to this translation unit's

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2226,7 +2226,8 @@ void CodeGenModule::SetCommonAttributes(GlobalDecl GD, llvm::GlobalValue *GV) {
22262226
}
22272227

22282228
bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
2229-
llvm::AttrBuilder &Attrs) {
2229+
llvm::AttrBuilder &Attrs,
2230+
bool SetTargetFeatures) {
22302231
// Add target-cpu and target-features attributes to functions. If
22312232
// we have a decl for the function and it has a target attribute then
22322233
// parse that and add it to the feature set.
@@ -2286,7 +2287,7 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
22862287
Attrs.addAttribute("tune-cpu", TuneCPU);
22872288
AddedAttr = true;
22882289
}
2289-
if (!Features.empty()) {
2290+
if (!Features.empty() && SetTargetFeatures) {
22902291
llvm::sort(Features);
22912292
Attrs.addAttribute("target-features", llvm::join(Features, ","));
22922293
AddedAttr = true;

clang/lib/CodeGen/CodeGenModule.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1583,7 +1583,8 @@ class CodeGenModule : public CodeGenTypeCache {
15831583
ForDefinition_t IsForDefinition = NotForDefinition);
15841584

15851585
bool GetCPUAndFeaturesAttributes(GlobalDecl GD,
1586-
llvm::AttrBuilder &AttrBuilder);
1586+
llvm::AttrBuilder &AttrBuilder,
1587+
bool SetTargetFeatures = true);
15871588
void setNonAliasAttributes(GlobalDecl GD, llvm::GlobalObject *GO);
15881589

15891590
/// Set function attributes for a function declaration.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
typedef unsigned long ulong;
2+
3+
__attribute__((target("gfx11-insts")))
4+
ulong do_intrin_stuff(void)
5+
{
6+
return __builtin_amdgcn_s_sendmsg_rtnl(0x0);
7+
}

clang/test/CodeGenCUDA/link-builtin-bitcode-denormal-fp-mode.cu

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -132,24 +132,32 @@ __global__ void kernel_f64(double* out, double* a, double* b, double* c) {
132132

133133
// Default mode relies on the implicit check-not for the denormal-fp-math.
134134

135-
// PSZ: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" {{.*}} "target-cpu"="gfx803" {{.*}} }
136-
// PSZ: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign" {{.*}} "target-cpu"="gfx803" {{.*}} }
137-
// PSZ: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign" {{.*}} "target-cpu"="gfx803" {{.*}} }
135+
// PSZ: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign"
136+
// PSZ-SAME: "target-cpu"="gfx803"
137+
// PSZ: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
138+
// PSZ-SAME: "target-cpu"="gfx803"
139+
// PSZ: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
140+
// PSZ-SAME: "target-cpu"="gfx803"
138141

139142
// FIXME: Should check-not "denormal-fp-math" within the line
140-
// IEEEF64-PSZF32: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign" {{.*}} "target-cpu"="gfx803" {{.*}} }
141-
// IEEEF64-PSZF32: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign" {{.*}} "target-cpu"="gfx803" {{.*}} }
142-
// IEEEF64-PSZF32: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign" {{.*}} "target-cpu"="gfx803" {{.*}} }
143+
// IEEEF64-PSZF32: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
144+
// IEEEF64-PSZF32-SAME: "target-cpu"="gfx803"
145+
// IEEEF64-PSZF32: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
146+
// IEEEF64-PSZF32-SAME: "target-cpu"="gfx803"
147+
// IEEEF64-PSZF32: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
148+
// IEEEF64-PSZF32-SAME: "target-cpu"="gfx803"
143149

144150
// IEEEF32-PSZF64-DYNF32: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" {{.*}} "target-cpu"="gfx803" {{.*}} }
145151
// implicit check-not
146152
// implicit check-not
147153

148154

149-
// IEEEF32-PSZF64-DYNFULL: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" {{.*}} "target-cpu"="gfx803" {{.*}} }
150-
// IEEEF32-PSZF64-DYNFULL: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" {{.*}} "target-cpu"="gfx803" {{.*}} }
151-
// IEEEF32-PSZF64-DYNFULL: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" {{.*}} "target-cpu"="gfx803" {{.*}} }
152-
155+
// IEEEF32-PSZF64-DYNFULL: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee"
156+
// IEEEF32-PSZF64-DYNFULL-SAME: "target-cpu"="gfx803"
157+
// IEEEF32-PSZF64-DYNFULL: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee"
158+
// IEEEF32-PSZF64-DYNFULL-SAME: "target-cpu"="gfx803"
159+
// IEEEF32-PSZF64-DYNFULL: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee"
160+
// IEEEF32-PSZF64-DYNFULL-SAME: "target-cpu"="gfx803"
153161

154162
// -mlink-bitcode-file doesn't internalize or propagate attributes.
155163
// NOINTERNALIZE-IEEEF32-PSZF64-DYNFULL: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" {{.*}} "target-cpu"="gfx803" {{.*}} }
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// Verify the behavior of the +gfxN-insts in the way that
2+
// rocm-device-libs should be built with. e.g. If the device libraries has a function
3+
// with "+gfx11-insts", that attribute should still be present after linking and not
4+
// overwritten with the current target's settings.
5+
6+
// This is important because at this time, many device-libs functions that are only
7+
// available on some GPUs put an attribute such as "+gfx11-insts" so that
8+
// AMDGPURemoveIncompatibleFunctions can detect & remove them if needed.
9+
10+
// Build the fake device library in the way rocm-device-libs should be built.
11+
//
12+
// RUN: %clang_cc1 -x cl -triple amdgcn-amd-amdhsa\
13+
// RUN: -mcode-object-version=none -emit-llvm-bc \
14+
// RUN: %S/Inputs/ocml-sample-target-attrs.cl -o %t.bc
15+
16+
// Check the default behavior
17+
// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 -fcuda-is-device \
18+
// RUN: -mlink-builtin-bitcode %t.bc \
19+
// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,INTERNALIZE
20+
21+
// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx1101 -fcuda-is-device \
22+
// RUN: -mlink-builtin-bitcode %t.bc -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,INTERNALIZE
23+
24+
// Check the case where no internalization is performed
25+
// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \
26+
// RUN: -fcuda-is-device -mlink-bitcode-file %t.bc -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,NOINTERNALIZE
27+
28+
// Check the case where no internalization is performed
29+
// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx1101 \
30+
// RUN: -fcuda-is-device -mlink-bitcode-file %t.bc -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,NOINTERNALIZE
31+
32+
33+
// CHECK: define {{.*}} i64 @do_intrin_stuff() #[[ATTR:[0-9]+]]
34+
// INTERNALIZE: attributes #[[ATTR]] = {{.*}} "target-cpu"="gfx{{.*}}" "target-features"="+gfx11-insts"
35+
// NOINTERNALIZE: attributes #[[ATTR]] = {{.*}} "target-features"="+gfx11-insts"
36+
37+
#define __device__ __attribute__((device))
38+
#define __global__ __attribute__((global))
39+
40+
typedef unsigned long ulong;
41+
42+
extern "C" {
43+
__device__ ulong do_intrin_stuff(void);
44+
45+
__global__ void kernel_f16(ulong* out) {
46+
*out = do_intrin_stuff();
47+
}
48+
}

0 commit comments

Comments
 (0)