Skip to content

Commit cc94771

Browse files
committed
[CUDA][HIP] add __builtin_get_device_side_mangled_name
Add builtin function __builtin_get_device_side_mangled_name to get device side manged name for functions and global variables, which can be used to get symbol address of kernels or variables by mangled name in dynamically loaded bundled code objects at run time. Reviewed by: Artem Belevich Differential Revision: https://reviews.llvm.org/D99301
1 parent dc928e9 commit cc94771

File tree

9 files changed

+101
-3
lines changed

9 files changed

+101
-3
lines changed

clang/include/clang/Basic/Builtins.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1639,6 +1639,9 @@ BUILTIN(__builtin_os_log_format, "v*v*cC*.", "p:0:nt")
16391639
// OpenMP 4.0
16401640
LANGBUILTIN(omp_is_initial_device, "i", "nc", OMP_LANG)
16411641

1642+
// CUDA/HIP
1643+
LANGBUILTIN(__builtin_get_device_side_mangled_name, "cC*.", "ncT", CUDA_LANG)
1644+
16421645
// Builtins for XRay
16431646
BUILTIN(__xray_customevent, "vcC*z", "")
16441647
BUILTIN(__xray_typedevent, "vzcC*z", "")

clang/include/clang/Basic/Builtins.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ enum LanguageID {
3636
OCLC20_LANG = 0x20, // builtin for OpenCL C 2.0 only.
3737
OCLC1X_LANG = 0x40, // builtin for OpenCL C 1.x only.
3838
OMP_LANG = 0x80, // builtin requires OpenMP.
39+
CUDA_LANG = 0x100, // builtin requires CUDA.
3940
ALL_LANGUAGES = C_LANG | CXX_LANG | OBJC_LANG, // builtin for all languages.
4041
ALL_GNU_LANGUAGES = ALL_LANGUAGES | GNU_LANG, // builtin requires GNU mode.
4142
ALL_MS_LANGUAGES = ALL_LANGUAGES | MS_LANG, // builtin requires MS mode.

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8303,6 +8303,9 @@ def note_cuda_device_builtin_surftex_should_be_template_class : Note<
83038303
"%0 needs to be instantiated from a class template with proper "
83048304
"template arguments">;
83058305

8306+
def err_hip_invalid_args_builtin_mangled_name : Error<
8307+
"invalid argument: symbol must be a device-side function or global variable">;
8308+
83068309
def warn_non_pod_vararg_with_format_string : Warning<
83078310
"cannot pass %select{non-POD|non-trivial}0 object of type %1 to variadic "
83088311
"%select{function|block|method|constructor}2; expected type from format "

clang/lib/Basic/Builtins.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,13 @@ bool Builtin::Context::builtinIsSupported(const Builtin::Info &BuiltinInfo,
7575
bool OclCUnsupported = !LangOpts.OpenCL &&
7676
(BuiltinInfo.Langs & ALL_OCLC_LANGUAGES);
7777
bool OpenMPUnsupported = !LangOpts.OpenMP && BuiltinInfo.Langs == OMP_LANG;
78+
bool CUDAUnsupported = !LangOpts.CUDA && BuiltinInfo.Langs == CUDA_LANG;
7879
bool CPlusPlusUnsupported =
7980
!LangOpts.CPlusPlus && BuiltinInfo.Langs == CXX_LANG;
8081
return !BuiltinsUnsupported && !MathBuiltinsUnsupported && !OclCUnsupported &&
8182
!OclC1Unsupported && !OclC2Unsupported && !OpenMPUnsupported &&
8283
!GnuModeUnsupported && !MSModeUnsupported && !ObjCUnsupported &&
83-
!CPlusPlusUnsupported;
84+
!CPlusPlusUnsupported && !CUDAUnsupported;
8485
}
8586

8687
/// initializeBuiltins - Mark the identifiers for all the builtins with their

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
#include "CGCUDARuntime.h"
1314
#include "CGCXXABI.h"
1415
#include "CGObjCRuntime.h"
1516
#include "CGOpenCLRuntime.h"
@@ -5058,6 +5059,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
50585059
Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
50595060
return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
50605061
}
5062+
5063+
case Builtin::BI__builtin_get_device_side_mangled_name: {
5064+
auto Name = CGM.getCUDARuntime().getDeviceSideName(
5065+
cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
5066+
auto Str = CGM.GetAddrOfConstantCString(Name, "");
5067+
llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0),
5068+
llvm::ConstantInt::get(SizeTy, 0)};
5069+
auto *Ptr = llvm::ConstantExpr::getGetElementPtr(Str.getElementType(),
5070+
Str.getPointer(), Zeros);
5071+
return RValue::get(Ptr);
5072+
}
50615073
}
50625074

50635075
// If this is an alias for a lib function (e.g. __builtin_sin), emit

clang/lib/CodeGen/CGCUDANV.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
//===----------------------------------------------------------------------===//
1313

1414
#include "CGCUDARuntime.h"
15+
#include "CGCXXABI.h"
1516
#include "CodeGenFunction.h"
1617
#include "CodeGenModule.h"
1718
#include "clang/AST/Decl.h"
@@ -260,10 +261,15 @@ std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) {
260261
else
261262
GD = GlobalDecl(ND);
262263
std::string DeviceSideName;
263-
if (DeviceMC->shouldMangleDeclName(ND)) {
264+
MangleContext *MC;
265+
if (CGM.getLangOpts().CUDAIsDevice)
266+
MC = &CGM.getCXXABI().getMangleContext();
267+
else
268+
MC = DeviceMC.get();
269+
if (MC->shouldMangleDeclName(ND)) {
264270
SmallString<256> Buffer;
265271
llvm::raw_svector_ostream Out(Buffer);
266-
DeviceMC->mangleName(GD, Out);
272+
MC->mangleName(GD, Out);
267273
DeviceSideName = std::string(Out.str());
268274
} else
269275
DeviceSideName = std::string(ND->getIdentifier()->getName());

clang/lib/Sema/SemaChecking.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1966,6 +1966,26 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
19661966

19671967
case Builtin::BI__builtin_matrix_column_major_store:
19681968
return SemaBuiltinMatrixColumnMajorStore(TheCall, TheCallResult);
1969+
1970+
case Builtin::BI__builtin_get_device_side_mangled_name: {
1971+
auto Check = [](CallExpr *TheCall) {
1972+
if (TheCall->getNumArgs() != 1)
1973+
return false;
1974+
auto *DRE = dyn_cast<DeclRefExpr>(TheCall->getArg(0)->IgnoreImpCasts());
1975+
if (!DRE)
1976+
return false;
1977+
auto *D = DRE->getDecl();
1978+
if (!isa<FunctionDecl>(D) && !isa<VarDecl>(D))
1979+
return false;
1980+
return D->hasAttr<CUDAGlobalAttr>() || D->hasAttr<CUDADeviceAttr>() ||
1981+
D->hasAttr<CUDAConstantAttr>() || D->hasAttr<HIPManagedAttr>();
1982+
};
1983+
if (!Check(TheCall)) {
1984+
Diag(TheCall->getBeginLoc(),
1985+
diag::err_hip_invalid_args_builtin_mangled_name);
1986+
return ExprError();
1987+
}
1988+
}
19691989
}
19701990

19711991
// Since the target specific builtins for each arch overlap, only check those
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// RUN: %clang_cc1 -triple x86_64-unknown-gnu-linux -aux-triple amdgcn-amd-amdhsa \
2+
// RUN: -emit-llvm -o - -x hip %s | FileCheck -check-prefixes=CHECK,LNX %s
3+
// RUN: %clang_cc1 -triple x86_64-unknown-windows-msvc -aux-triple amdgcn-amd-amdhsa \
4+
// RUN: -emit-llvm -o - -x hip %s | FileCheck -check-prefixes=CHECK,MSVC %s
5+
6+
#include "Inputs/cuda.h"
7+
8+
namespace X {
9+
__global__ void kern1(int *x);
10+
__device__ int var1;
11+
}
12+
13+
// CHECK: @[[STR1:.*]] = {{.*}} c"_ZN1X5kern1EPi\00"
14+
// CHECK: @[[STR2:.*]] = {{.*}} c"_ZN1X4var1E\00"
15+
16+
// LNX-LABEL: define {{.*}}@_Z4fun1v()
17+
// MSVC-LABEL: define {{.*}} @"?fun1@@YAPEBDXZ"()
18+
// CHECK: ret i8* getelementptr inbounds ({{.*}} @[[STR1]], i64 0, i64 0)
19+
const char *fun1() {
20+
return __builtin_get_device_side_mangled_name(X::kern1);
21+
}
22+
23+
// LNX-LABEL: define {{.*}}@_Z4fun2v()
24+
// MSVC-LABEL: define {{.*}}@"?fun2@@YAPEBDXZ"()
25+
// CHECK: ret i8* getelementptr inbounds ({{.*}} @[[STR2]], i64 0, i64 0)
26+
__host__ __device__ const char *fun2() {
27+
return __builtin_get_device_side_mangled_name(X::var1);
28+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// RUN: %clang_cc1 -triple x86_64-unknown-gnu-linux -aux-triple amdgcn-amd-amdhsa \
2+
// RUN: -verify -fsyntax-only -x hip %s
3+
4+
#include "Inputs/cuda.h"
5+
6+
__global__ void kern1();
7+
int y;
8+
9+
void fun1() {
10+
int x;
11+
const char *p;
12+
p = __builtin_get_device_side_mangled_name();
13+
// expected-error@-1 {{invalid argument: symbol must be a device-side function or global variable}}
14+
p = __builtin_get_device_side_mangled_name(kern1, kern1);
15+
// expected-error@-1 {{invalid argument: symbol must be a device-side function or global variable}}
16+
p = __builtin_get_device_side_mangled_name(1);
17+
// expected-error@-1 {{invalid argument: symbol must be a device-side function or global variable}}
18+
p = __builtin_get_device_side_mangled_name(x);
19+
// expected-error@-1 {{invalid argument: symbol must be a device-side function or global variable}}
20+
p = __builtin_get_device_side_mangled_name(fun1);
21+
// expected-error@-1 {{invalid argument: symbol must be a device-side function or global variable}}
22+
p = __builtin_get_device_side_mangled_name(y);
23+
// expected-error@-1 {{invalid argument: symbol must be a device-side function or global variable}}
24+
}

0 commit comments

Comments
 (0)