Skip to content

Commit 5aeee76

Browse files
committed
[CUDA] Add support for __grid_constant__ attribute
1 parent 795b4ef commit 5aeee76

File tree

12 files changed

+135
-8
lines changed

12 files changed

+135
-8
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -716,6 +716,7 @@ CUDA Support
716716
^^^^^^^^^^^^
717717
- Clang now supports CUDA SDK up to 12.6
718718
- Added support for sm_100
719+
- Added support for `__grid_constant__` attribute.
719720

720721
AIX Support
721722
^^^^^^^^^^^

clang/include/clang/Basic/Attr.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1450,6 +1450,13 @@ def CUDAHost : InheritableAttr {
14501450
}
14511451
def : MutualExclusions<[CUDAGlobal, CUDAHost]>;
14521452

1453+
def CUDAGridConstant : InheritableAttr {
1454+
let Spellings = [GNU<"grid_constant">, Declspec<"__grid_constant__">];
1455+
let Subjects = SubjectList<[ParmVar]>;
1456+
let LangOpts = [CUDA];
1457+
let Documentation = [Undocumented];
1458+
}
1459+
14531460
def NVPTXKernel : InheritableAttr, TargetSpecificAttr<TargetNVPTX> {
14541461
let Spellings = [Clang<"nvptx_kernel">];
14551462
let Subjects = SubjectList<[Function]>;

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9100,6 +9100,8 @@ def err_cuda_host_shared : Error<
91009100
"%select{__device__|__global__|__host__|__host__ __device__}0 functions">;
91019101
def err_cuda_nonstatic_constdev: Error<"__constant__, __device__, and "
91029102
"__managed__ are not allowed on non-static local variables">;
9103+
def err_cuda_grid_constant_not_allowed : Error<
9104+
"__grid_constant__ is only allowed on const-qualified kernel parameters">;
91039105
def err_cuda_ovl_target : Error<
91049106
"%select{__device__|__global__|__host__|__host__ __device__}0 function %1 "
91059107
"cannot overload %select{__device__|__global__|__host__|__host__ __device__}2 function %3">;

clang/lib/CodeGen/Targets/NVPTX.cpp

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include "ABIInfoImpl.h"
1010
#include "TargetInfo.h"
11+
#include "llvm/ADT/STLExtras.h"
1112
#include "llvm/IR/IntrinsicsNVPTX.h"
1213

1314
using namespace clang;
@@ -78,7 +79,13 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
7879
// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
7980
// resulting MDNode to the nvvm.annotations MDNode.
8081
static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
81-
int Operand);
82+
int Operand,
83+
const SmallVectorImpl<int> &GridConstantArgs);
84+
85+
static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
86+
int Operand) {
87+
addNVVMMetadata(GV, Name, Operand, SmallVector<int, 1>(0));
88+
}
8289

8390
private:
8491
static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
@@ -240,7 +247,8 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
240247
}
241248

242249
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
243-
if (!FD) return;
250+
if (!FD)
251+
return;
244252

245253
llvm::Function *F = cast<llvm::Function>(GV);
246254

@@ -263,8 +271,13 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
263271
// __global__ functions cannot be called from the device, we do not
264272
// need to set the noinline attribute.
265273
if (FD->hasAttr<CUDAGlobalAttr>()) {
274+
SmallVector<int, 10> GCI;
275+
for (auto IV : llvm::enumerate(FD->parameters()))
276+
if (IV.value()->hasAttr<CUDAGridConstantAttr>())
277+
// For some reason arg indices are 1-based in NVVM
278+
GCI.push_back(IV.index() + 1);
266279
// Create !{<func-ref>, metadata !"kernel", i32 1} node
267-
addNVVMMetadata(F, "kernel", 1);
280+
addNVVMMetadata(F, "kernel", 1, GCI);
268281
}
269282
if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
270283
M.handleCUDALaunchBoundsAttr(F, Attr);
@@ -276,18 +289,27 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
276289
}
277290
}
278291

279-
void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
280-
StringRef Name, int Operand) {
292+
void NVPTXTargetCodeGenInfo::addNVVMMetadata(
293+
llvm::GlobalValue *GV, StringRef Name, int Operand,
294+
const SmallVectorImpl<int> &GridConstantArgs) {
281295
llvm::Module *M = GV->getParent();
282296
llvm::LLVMContext &Ctx = M->getContext();
283297

284298
// Get "nvvm.annotations" metadata node
285299
llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
286300

287-
llvm::Metadata *MDVals[] = {
301+
SmallVector<llvm::Metadata *, 5> MDVals = {
288302
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
289303
llvm::ConstantAsMetadata::get(
290304
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
305+
if (!GridConstantArgs.empty()) {
306+
SmallVector<llvm::Metadata *, 10> GCM;
307+
for (int I : GridConstantArgs)
308+
GCM.push_back(llvm::ConstantAsMetadata::get(
309+
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), I)));
310+
MDVals.append({llvm::MDString::get(Ctx, "grid_constant"),
311+
llvm::MDNode::get(Ctx, GCM)});
312+
}
291313
// Append metadata to nvvm.annotations
292314
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
293315
}
@@ -309,7 +331,7 @@ NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
309331
return llvm::ConstantExpr::getAddrSpaceCast(
310332
llvm::ConstantPointerNull::get(NPT), PT);
311333
}
312-
}
334+
} // namespace
313335

314336
void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
315337
const CUDALaunchBoundsAttr *Attr,

clang/lib/Sema/SemaDecl.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12222,8 +12222,17 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD,
1222212222
<< NewFD;
1222312223
}
1222412224

12225-
if (!Redeclaration && LangOpts.CUDA)
12225+
if (!Redeclaration && LangOpts.CUDA) {
12226+
bool IsKernel = NewFD->hasAttr<CUDAGlobalAttr>();
12227+
for (auto *Parm : NewFD->parameters()) {
12228+
if (!Parm->getType()->isDependentType() &&
12229+
Parm->hasAttr<CUDAGridConstantAttr>() &&
12230+
!(IsKernel && Parm->getType().isConstQualified()))
12231+
Diag(Parm->getAttr<CUDAGridConstantAttr>()->getLocation(),
12232+
diag::err_cuda_grid_constant_not_allowed);
12233+
}
1222612234
CUDA().checkTargetOverload(NewFD, Previous);
12235+
}
1222712236
}
1222812237

1222912238
// Check if the function definition uses any AArch64 SME features without

clang/lib/Sema/SemaDeclAttr.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4748,6 +4748,15 @@ static void handleManagedAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
47484748
D->addAttr(CUDADeviceAttr::CreateImplicit(S.Context));
47494749
}
47504750

4751+
static void handleGridConstantAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
4752+
if (D->isInvalidDecl())
4753+
return;
4754+
// Whether __grid_constant__ is allowed to be used will be checked in
4755+
// Sema::CheckFunctionDeclaration as we need complete function decl to make
4756+
// the call.
4757+
D->addAttr(::new (S.Context) CUDAGridConstantAttr(S.Context, AL));
4758+
}
4759+
47514760
static void handleGNUInlineAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
47524761
const auto *Fn = cast<FunctionDecl>(D);
47534762
if (!Fn->isInlineSpecified()) {
@@ -6642,6 +6651,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
66426651
case ParsedAttr::AT_CUDADevice:
66436652
handleDeviceAttr(S, D, AL);
66446653
break;
6654+
case ParsedAttr::AT_CUDAGridConstant:
6655+
handleGridConstantAttr(S, D, AL);
6656+
break;
66456657
case ParsedAttr::AT_HIPManaged:
66466658
handleManagedAttr(S, D, AL);
66476659
break;

clang/lib/Sema/SemaTemplateInstantiateDecl.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -876,6 +876,12 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
876876
continue;
877877
}
878878

879+
if (auto *A = dyn_cast<CUDAGridConstantAttr>(TmplAttr)) {
880+
if (!New->hasAttr<CUDAGridConstantAttr>())
881+
New->addAttr(A->clone(Context));
882+
continue;
883+
}
884+
879885
assert(!TmplAttr->isPackExpansion());
880886
if (TmplAttr->isLateParsed() && LateAttrs) {
881887
// Late parsed attributes must be instantiated and attached after the

clang/test/CodeGenCUDA/Inputs/cuda.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#define __managed__ __attribute__((managed))
1313
#endif
1414
#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
15+
#define __grid_constant__ __attribute__((grid_constant))
1516
#else
1617
#define __constant__
1718
#define __device__
@@ -20,6 +21,7 @@
2021
#define __shared__
2122
#define __managed__
2223
#define __launch_bounds__(...)
24+
#define __grid_constant__
2325
#endif
2426

2527
struct dim3 {
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 5
2+
// RUN: %clang_cc1 "-triple" "nvptx64-nvidia-cuda" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
3+
4+
#include "Inputs/cuda.h"
5+
6+
struct S {};
7+
8+
__global__ void kernel(__grid_constant__ const S gc_arg1, int arg2, __grid_constant__ const int gc_arg3) {}
9+
10+
// dependent arguments get diagnosed after instantiation.
11+
template <typename T>
12+
__global__ void tkernel_const(__grid_constant__ const T arg) {}
13+
14+
template <typename T>
15+
__global__ void tkernel(int dummy, __grid_constant__ T arg) {}
16+
17+
void foo() {
18+
tkernel_const<const S><<<1,1>>>({});
19+
tkernel_const<S><<<1,1>>>({});
20+
tkernel<const S><<<1,1>>>(1, {});
21+
}
22+
//.
23+
//.
24+
// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"kernel", i32 1, !"grid_constant", [[META1:![0-9]+]]}
25+
// CHECK: [[META1]] = !{i32 1, i32 3}
26+
// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3:![0-9]+]]}
27+
// CHECK: [[META3]] = !{i32 1}
28+
// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3]]}
29+
// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"kernel", i32 1, !"grid_constant", [[META6:![0-9]+]]}
30+
// CHECK: [[META6]] = !{i32 2}
31+
//.

clang/test/Misc/pragma-attribute-supported-attributes-list.test

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
// CHECK-NEXT: CUDADeviceBuiltinSurfaceType (SubjectMatchRule_record)
4040
// CHECK-NEXT: CUDADeviceBuiltinTextureType (SubjectMatchRule_record)
4141
// CHECK-NEXT: CUDAGlobal (SubjectMatchRule_function)
42+
// CHECK-NEXT: CUDAGridConstant (SubjectMatchRule_variable_is_parameter)
4243
// CHECK-NEXT: CUDAHost (SubjectMatchRule_function)
4344
// CHECK-NEXT: CUDALaunchBounds (SubjectMatchRule_objc_method, SubjectMatchRule_hasType_functionType)
4445
// CHECK-NEXT: CUDAShared (SubjectMatchRule_variable)

clang/test/SemaCUDA/Inputs/cuda.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#define __host__ __attribute__((host))
1212
#define __shared__ __attribute__((shared))
1313
#define __managed__ __attribute__((managed))
14+
#define __grid_constant__ __attribute__((grid_constant))
1415
#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
1516

1617
struct dim3 {

clang/test/SemaCUDA/grid-constant.cu

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// RUN: %clang_cc1 -fsyntax-only -verify %s
2+
// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s
3+
#include "Inputs/cuda.h"
4+
5+
struct S {};
6+
7+
__global__ void kernel_struct(__grid_constant__ const S arg) {}
8+
__global__ void kernel_scalar(__grid_constant__ const int arg) {}
9+
10+
__global__ void gc_kernel_non_const(__grid_constant__ S arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}
11+
12+
void non_kernel(__grid_constant__ S arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}
13+
14+
// templates w/ non-dependent argument types get diagnosed right
15+
// away, without instantiation.
16+
template <typename T>
17+
__global__ void tkernel_nd_const(__grid_constant__ const S arg, T dummy) {}
18+
template <typename T>
19+
__global__ void tkernel_nd_non_const(__grid_constant__ S arg, T dummy) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}
20+
21+
// dependent arguments get diagnosed after instantiation.
22+
template <typename T>
23+
__global__ void tkernel_const(__grid_constant__ const T arg) {}
24+
25+
template <typename T>
26+
__global__ void tkernel(__grid_constant__ T arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}
27+
28+
void foo() {
29+
tkernel_const<const S><<<1,1>>>({});
30+
tkernel_const<S><<<1,1>>>({});
31+
tkernel<const S><<<1,1>>>({});
32+
tkernel<S><<<1,1>>>({}); // expected-note {{in instantiation of function template specialization 'tkernel<S>' requested here}}
33+
}

0 commit comments

Comments
 (0)