Skip to content

Commit d735c89

Browse files
tahonermannelizabethandrews
authored andcommitted
[SYCL] Basic code generation for SYCL kernel caller offload entry point functions. (llvm#133030)
A function declared with the `sycl_kernel_entry_point` attribute, sometimes called a SYCL kernel entry point function, specifies a pattern from which the parameters and body of an offload entry point function, sometimes called a SYCL kernel caller function, are derived. SYCL kernel caller functions are emitted during SYCL device compilation. Their parameters and body are derived from the `SYCLKernelCallStmt` statement and `OutlinedFunctionDecl` declaration associated with their corresponding SYCL kernel entry point function. A distinct SYCL kernel caller function is generated for each SYCL kernel entry point function defined as a non-inline function or ODR-used in the translation unit. The name of each SYCL kernel caller function is parameterized by the SYCL kernel name type specified by the `sycl_kernel_entry_point` attribute attached to the corresponding SYCL kernel entry point function. For the moment, the Itanium ABI mangled name for typeinfo data (`_ZTS<type>`) is used to name these functions; a future change will switch to a more appropriate naming scheme. The calling convention used for a SYCL kernel caller function is target dependent. Support for AMDGCN, NVPTX, and SPIR targets is currently provided. These functions are required to observe the language restrictions for SYCL devices as specified by the SYCL 2020 specification; this includes a forward progress guarantee and prohibits recursion. Only SYCL kernel caller functions, functions declared as `SYCL_EXTERNAL`, and functions directly or indirectly referenced from those functions should be emitted during device compilation. Pruning of other declarations has not yet been implemented. --------- Co-authored-by: Elizabeth Andrews <[email protected]>
1 parent 30ab41c commit d735c89

File tree

10 files changed

+355
-6
lines changed

10 files changed

+355
-6
lines changed

clang/include/clang/AST/SYCLKernelInfo.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,23 @@ namespace clang {
2222
class SYCLKernelInfo {
2323
public:
2424
SYCLKernelInfo(CanQualType KernelNameType,
25-
const FunctionDecl *KernelEntryPointDecl)
25+
const FunctionDecl *KernelEntryPointDecl,
26+
const std::string &KernelName)
2627
: KernelNameType(KernelNameType),
27-
KernelEntryPointDecl(KernelEntryPointDecl) {}
28+
KernelEntryPointDecl(KernelEntryPointDecl), KernelName(KernelName) {}
2829

2930
CanQualType getKernelNameType() const { return KernelNameType; }
3031

3132
const FunctionDecl *getKernelEntryPointDecl() const {
3233
return KernelEntryPointDecl;
3334
}
3435

36+
const std::string &GetKernelName() const { return KernelName; }
37+
3538
private:
3639
CanQualType KernelNameType;
3740
const FunctionDecl *KernelEntryPointDecl;
41+
std::string KernelName;
3842
};
3943

4044
} // namespace clang

clang/lib/AST/ASTContext.cpp

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12825,6 +12825,15 @@ bool ASTContext::DeclMustBeEmitted(const Decl *D) {
1282512825
if (!FD->doesThisDeclarationHaveABody())
1282612826
return FD->doesDeclarationForceExternallyVisibleDefinition();
1282712827

12828+
// Function definitions with the sycl_kernel_entry_point attribute are
12829+
// required during device compilation so that SYCL kernel caller offload
12830+
// entry points are emitted.
12831+
if (LangOpts.SYCLIsDevice && FD->hasAttr<SYCLKernelEntryPointAttr>())
12832+
return true;
12833+
12834+
// FIXME: Functions declared with SYCL_EXTERNAL are required during
12835+
// device compilation.
12836+
1282812837
// Constructors and destructors are required.
1282912838
if (FD->hasAttr<ConstructorAttr>() || FD->hasAttr<DestructorAttr>())
1283012839
return true;
@@ -14832,9 +14841,36 @@ void ASTContext::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
1483214841
}
1483314842
}
1483414843

14835-
static SYCLKernelInfo BuildSYCLKernelInfo(CanQualType KernelNameType,
14844+
static SYCLKernelInfo BuildSYCLKernelInfo(ASTContext &Context,
14845+
CanQualType KernelNameType,
1483614846
const FunctionDecl *FD) {
14837-
return {KernelNameType, FD};
14847+
// Host and device compilation may use different ABIs and different ABIs
14848+
// may allocate name mangling discriminators differently. A discriminator
14849+
// override is used to ensure consistent discriminator allocation across
14850+
// host and device compilation.
14851+
auto DeviceDiscriminatorOverrider =
14852+
[](ASTContext &Ctx, const NamedDecl *ND) -> UnsignedOrNone {
14853+
if (const auto *RD = dyn_cast<CXXRecordDecl>(ND))
14854+
if (RD->isLambda())
14855+
return RD->getDeviceLambdaManglingNumber();
14856+
return std::nullopt;
14857+
};
14858+
std::unique_ptr<MangleContext> MC{ItaniumMangleContext::create(
14859+
Context, Context.getDiagnostics(), DeviceDiscriminatorOverrider)};
14860+
14861+
// Construct a mangled name for the SYCL kernel caller offload entry point.
14862+
// FIXME: The Itanium typeinfo mangling (_ZTS<type>) is currently used to
14863+
// name the SYCL kernel caller offload entry point function. This mangling
14864+
// does not suffice to clearly identify symbols that correspond to SYCL
14865+
// kernel caller functions, nor is this mangling natural for targets that
14866+
// use a non-Itanium ABI.
14867+
std::string Buffer;
14868+
Buffer.reserve(128);
14869+
llvm::raw_string_ostream Out(Buffer);
14870+
MC->mangleCanonicalTypeName(KernelNameType, Out);
14871+
std::string KernelName = Out.str();
14872+
14873+
return {KernelNameType, FD, KernelName};
1483814874
}
1483914875

1484014876
void ASTContext::registerSYCLEntryPointFunction(FunctionDecl *FD) {
@@ -14855,8 +14891,8 @@ void ASTContext::registerSYCLEntryPointFunction(FunctionDecl *FD) {
1485514891
declaresSameEntity(FD, IT->second.getKernelEntryPointDecl())) &&
1485614892
"SYCL kernel name conflict");
1485714893
(void)IT;
14858-
SYCLKernels.insert(
14859-
std::make_pair(KernelNameType, BuildSYCLKernelInfo(KernelNameType, FD)));
14894+
SYCLKernels.insert(std::make_pair(
14895+
KernelNameType, BuildSYCLKernelInfo(*this, KernelNameType, FD)));
1486014896
}
1486114897

1486214898
const SYCLKernelInfo &ASTContext::getSYCLKernelInfo(QualType T) const {

clang/lib/CodeGen/CGCall.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -739,6 +739,17 @@ CodeGenTypes::arrangeBuiltinFunctionDeclaration(CanQualType resultType,
739739
RequiredArgs::All);
740740
}
741741

742+
const CGFunctionInfo &
743+
CodeGenTypes::arrangeSYCLKernelCallerDeclaration(QualType resultType,
744+
const FunctionArgList &args) {
745+
CanQualTypeList argTypes = getArgTypesForDeclaration(Context, args);
746+
747+
return arrangeLLVMFunctionInfo(GetReturnType(resultType), FnInfoOpts::None,
748+
argTypes,
749+
FunctionType::ExtInfo(CC_OpenCLKernel),
750+
/*paramInfos=*/{}, RequiredArgs::All);
751+
}
752+
742753
/// Arrange a call to a C++ method, passing the given arguments.
743754
///
744755
/// numPrefixArgs is the number of ABI-specific prefix arguments we have. It

clang/lib/CodeGen/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ add_clang_library(clangCodeGen
102102
CodeGenFunction.cpp
103103
CodeGenModule.cpp
104104
CodeGenPGO.cpp
105+
CodeGenSYCL.cpp
105106
CodeGenTBAA.cpp
106107
CodeGenTypes.cpp
107108
ConstantInitBuilder.cpp

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3309,6 +3309,27 @@ void CodeGenModule::EmitDeferred() {
33093309
CurDeclsToEmit.swap(DeferredDeclsToEmit);
33103310

33113311
for (GlobalDecl &D : CurDeclsToEmit) {
3312+
// Functions declared with the sycl_kernel_entry_point attribute are
3313+
// emitted normally during host compilation. During device compilation,
3314+
// a SYCL kernel caller offload entry point function is generated and
3315+
// emitted in place of each of these functions.
3316+
if (const auto *FD = D.getDecl()->getAsFunction()) {
3317+
if (LangOpts.SYCLIsDevice && FD->hasAttr<SYCLKernelEntryPointAttr>() &&
3318+
FD->isDefined()) {
3319+
// Functions with an invalid sycl_kernel_entry_point attribute are
3320+
// ignored during device compilation.
3321+
if (!FD->getAttr<SYCLKernelEntryPointAttr>()->isInvalidAttr()) {
3322+
// Generate and emit the SYCL kernel caller function.
3323+
EmitSYCLKernelCaller(FD, getContext());
3324+
// Recurse to emit any symbols directly or indirectly referenced
3325+
// by the SYCL kernel caller function.
3326+
EmitDeferred();
3327+
}
3328+
// Do not emit the sycl_kernel_entry_point attributed function.
3329+
continue;
3330+
}
3331+
}
3332+
33123333
// We should call GetAddrOfGlobal with IsForDefinition set to true in order
33133334
// to get GlobalValue with exactly the type we need, not something that
33143335
// might had been created for another decl with the same mangled name but
@@ -3644,6 +3665,10 @@ bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) {
36443665
// Defer until all versions have been semantically checked.
36453666
if (FD->hasAttr<TargetVersionAttr>() && !FD->isMultiVersion())
36463667
return false;
3668+
// Defer emission of SYCL kernel entry point functions during device
3669+
// compilation.
3670+
if (LangOpts.SYCLIsDevice && FD->hasAttr<SYCLKernelEntryPointAttr>())
3671+
return false;
36473672
}
36483673
if (const auto *VD = dyn_cast<VarDecl>(Global)) {
36493674
if (Context.getInlineVariableDefinitionKind(VD) ==

clang/lib/CodeGen/CodeGenModule.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1972,6 +1972,11 @@ class CodeGenModule : public CodeGenTypeCache {
19721972
/// .gcda files in a way that persists in .bc files.
19731973
void EmitCoverageFile();
19741974

1975+
/// Given a sycl_kernel_entry_point attributed function, emit the
1976+
/// corresponding SYCL kernel caller offload entry point function.
1977+
void EmitSYCLKernelCaller(const FunctionDecl *KernelEntryPointFn,
1978+
ASTContext &Ctx);
1979+
19751980
/// Determine whether the definition must be emitted; if this returns \c
19761981
/// false, the definition can be emitted lazily if it's used.
19771982
bool MustBeEmitted(const ValueDecl *D);

clang/lib/CodeGen/CodeGenSYCL.cpp

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
//===--------- CodeGenSYCL.cpp - Code for SYCL kernel generation ----------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This contains code required for generation of SYCL kernel caller offload
10+
// entry point functions.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#include "CodeGenFunction.h"
15+
#include "CodeGenModule.h"
16+
17+
using namespace clang;
18+
using namespace CodeGen;
19+
20+
static void SetSYCLKernelAttributes(llvm::Function *Fn, CodeGenFunction &CGF) {
21+
// SYCL 2020 device language restrictions require forward progress and
22+
// disallow recursion.
23+
Fn->setDoesNotRecurse();
24+
if (CGF.checkIfFunctionMustProgress())
25+
Fn->addFnAttr(llvm::Attribute::MustProgress);
26+
}
27+
28+
void CodeGenModule::EmitSYCLKernelCaller(const FunctionDecl *KernelEntryPointFn,
29+
ASTContext &Ctx) {
30+
assert(Ctx.getLangOpts().SYCLIsDevice &&
31+
"SYCL kernel caller offload entry point functions can only be emitted"
32+
" during device compilation");
33+
34+
const auto *KernelEntryPointAttr =
35+
KernelEntryPointFn->getAttr<SYCLKernelEntryPointAttr>();
36+
assert(KernelEntryPointAttr && "Missing sycl_kernel_entry_point attribute");
37+
assert(!KernelEntryPointAttr->isInvalidAttr() &&
38+
"sycl_kernel_entry_point attribute is invalid");
39+
40+
// Find the SYCLKernelCallStmt.
41+
SYCLKernelCallStmt *KernelCallStmt =
42+
cast<SYCLKernelCallStmt>(KernelEntryPointFn->getBody());
43+
44+
// Retrieve the SYCL kernel caller parameters from the OutlinedFunctionDecl.
45+
FunctionArgList Args;
46+
const OutlinedFunctionDecl *OutlinedFnDecl =
47+
KernelCallStmt->getOutlinedFunctionDecl();
48+
Args.append(OutlinedFnDecl->param_begin(), OutlinedFnDecl->param_end());
49+
50+
// Compute the function info and LLVM function type.
51+
const CGFunctionInfo &FnInfo =
52+
getTypes().arrangeSYCLKernelCallerDeclaration(Ctx.VoidTy, Args);
53+
llvm::FunctionType *FnTy = getTypes().GetFunctionType(FnInfo);
54+
55+
// Retrieve the generated name for the SYCL kernel caller function.
56+
CanQualType KernelNameType =
57+
Ctx.getCanonicalType(KernelEntryPointAttr->getKernelName());
58+
const SYCLKernelInfo &KernelInfo = Ctx.getSYCLKernelInfo(KernelNameType);
59+
auto *Fn = llvm::Function::Create(FnTy, llvm::Function::ExternalLinkage,
60+
KernelInfo.GetKernelName(), &getModule());
61+
62+
// Emit the SYCL kernel caller function.
63+
CodeGenFunction CGF(*this);
64+
SetLLVMFunctionAttributes(GlobalDecl(), FnInfo, Fn, false);
65+
SetSYCLKernelAttributes(Fn, CGF);
66+
CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, Fn, FnInfo, Args,
67+
SourceLocation(), SourceLocation());
68+
CGF.EmitFunctionBody(OutlinedFnDecl->getBody());
69+
setDSOLocal(Fn);
70+
SetLLVMFunctionAttributesForDefinition(cast<Decl>(OutlinedFnDecl), Fn);
71+
CGF.FinishFunction();
72+
}

clang/lib/CodeGen/CodeGenTypes.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,13 @@ class CodeGenTypes {
229229
const CGFunctionInfo &arrangeBuiltinFunctionCall(QualType resultType,
230230
const CallArgList &args);
231231

232+
/// A SYCL kernel caller function is an offload device entry point function
233+
/// with a target device dependent calling convention such as amdgpu_kernel,
234+
/// ptx_kernel, or spir_kernel.
235+
const CGFunctionInfo &
236+
arrangeSYCLKernelCallerDeclaration(QualType resultType,
237+
const FunctionArgList &args);
238+
232239
/// Objective-C methods are C functions with some implicit parameters.
233240
const CGFunctionInfo &arrangeObjCMethodDeclaration(const ObjCMethodDecl *MD);
234241
const CGFunctionInfo &arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD,

clang/lib/CodeGen/Targets/NVPTX.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
7777
return true;
7878
}
7979

80+
unsigned getOpenCLKernelCallingConv() const override {
81+
return llvm::CallingConv::PTX_Kernel;
82+
}
83+
8084
// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
8185
// resulting MDNode to the nvvm.annotations MDNode.
8286
static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,

0 commit comments

Comments
 (0)