Skip to content

Commit 7c3fdcc

Browse files
authored
[CUDA] Add support for __grid_constant__ attribute (llvm#114589)
LLVM support for the attribute has been implemented already, so it just plumbs it through to the CUDA front-end. One notable difference from NVCC is that the attribute can be used regardless of the targeted GPU. On the older GPUs it will just be ignored. The attribute is a performance hint, and does not warrant a hard error if compiler can't benefit from it on a particular GPU variant.
1 parent 592c0fe commit 7c3fdcc

File tree

13 files changed

+145
-8
lines changed

13 files changed

+145
-8
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -716,6 +716,7 @@ CUDA Support
716716
^^^^^^^^^^^^
717717
- Clang now supports CUDA SDK up to 12.6
718718
- Added support for sm_100
719+
- Added support for `__grid_constant__` attribute.
719720

720721
AIX Support
721722
^^^^^^^^^^^

clang/include/clang/Basic/Attr.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1451,6 +1451,13 @@ def CUDAHost : InheritableAttr {
14511451
}
14521452
def : MutualExclusions<[CUDAGlobal, CUDAHost]>;
14531453

1454+
def CUDAGridConstant : InheritableAttr {
1455+
let Spellings = [GNU<"grid_constant">, Declspec<"__grid_constant__">];
1456+
let Subjects = SubjectList<[ParmVar]>;
1457+
let LangOpts = [CUDA];
1458+
let Documentation = [CUDAGridConstantAttrDocs];
1459+
}
1460+
14541461
def NVPTXKernel : InheritableAttr, TargetSpecificAttr<TargetNVPTX> {
14551462
let Spellings = [Clang<"nvptx_kernel">];
14561463
let Subjects = SubjectList<[Function]>;

clang/include/clang/Basic/AttrDocs.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6794,6 +6794,16 @@ unbind runtime APIs.
67946794
}];
67956795
}
67966796

6797+
def CUDAGridConstantAttrDocs : Documentation {
6798+
let Category = DocCatDecl;
6799+
let Content = [{
6800+
The ``__grid_constant__`` attribute can be applied to a ``const``-qualified kernel
6801+
function argument and allows compiler to take the address of that argument without
6802+
making a copy. The argument applies to sm_70 or newer GPUs, during compilation
6803+
with CUDA-11.7(PTX 7.7) or newer, and is ignored otherwise.
6804+
}];
6805+
}
6806+
67976807
def HIPManagedAttrDocs : Documentation {
67986808
let Category = DocCatDecl;
67996809
let Content = [{

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9100,6 +9100,8 @@ def err_cuda_host_shared : Error<
91009100
"%select{__device__|__global__|__host__|__host__ __device__}0 functions">;
91019101
def err_cuda_nonstatic_constdev: Error<"__constant__, __device__, and "
91029102
"__managed__ are not allowed on non-static local variables">;
9103+
def err_cuda_grid_constant_not_allowed : Error<
9104+
"__grid_constant__ is only allowed on const-qualified kernel parameters">;
91039105
def err_cuda_ovl_target : Error<
91049106
"%select{__device__|__global__|__host__|__host__ __device__}0 function %1 "
91059107
"cannot overload %select{__device__|__global__|__host__|__host__ __device__}2 function %3">;

clang/lib/CodeGen/Targets/NVPTX.cpp

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include "ABIInfoImpl.h"
1010
#include "TargetInfo.h"
11+
#include "llvm/ADT/STLExtras.h"
1112
#include "llvm/IR/IntrinsicsNVPTX.h"
1213

1314
using namespace clang;
@@ -78,7 +79,13 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
7879
// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
7980
// resulting MDNode to the nvvm.annotations MDNode.
8081
static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
81-
int Operand);
82+
int Operand,
83+
const SmallVectorImpl<int> &GridConstantArgs);
84+
85+
static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
86+
int Operand) {
87+
addNVVMMetadata(GV, Name, Operand, SmallVector<int, 1>(0));
88+
}
8289

8390
private:
8491
static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
@@ -240,7 +247,8 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
240247
}
241248

242249
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
243-
if (!FD) return;
250+
if (!FD)
251+
return;
244252

245253
llvm::Function *F = cast<llvm::Function>(GV);
246254

@@ -263,8 +271,13 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
263271
// __global__ functions cannot be called from the device, we do not
264272
// need to set the noinline attribute.
265273
if (FD->hasAttr<CUDAGlobalAttr>()) {
274+
SmallVector<int, 10> GCI;
275+
for (auto IV : llvm::enumerate(FD->parameters()))
276+
if (IV.value()->hasAttr<CUDAGridConstantAttr>())
277+
// For some reason arg indices are 1-based in NVVM
278+
GCI.push_back(IV.index() + 1);
266279
// Create !{<func-ref>, metadata !"kernel", i32 1} node
267-
addNVVMMetadata(F, "kernel", 1);
280+
addNVVMMetadata(F, "kernel", 1, GCI);
268281
}
269282
if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
270283
M.handleCUDALaunchBoundsAttr(F, Attr);
@@ -276,18 +289,27 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
276289
}
277290
}
278291

279-
void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
280-
StringRef Name, int Operand) {
292+
void NVPTXTargetCodeGenInfo::addNVVMMetadata(
293+
llvm::GlobalValue *GV, StringRef Name, int Operand,
294+
const SmallVectorImpl<int> &GridConstantArgs) {
281295
llvm::Module *M = GV->getParent();
282296
llvm::LLVMContext &Ctx = M->getContext();
283297

284298
// Get "nvvm.annotations" metadata node
285299
llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
286300

287-
llvm::Metadata *MDVals[] = {
301+
SmallVector<llvm::Metadata *, 5> MDVals = {
288302
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
289303
llvm::ConstantAsMetadata::get(
290304
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
305+
if (!GridConstantArgs.empty()) {
306+
SmallVector<llvm::Metadata *, 10> GCM;
307+
for (int I : GridConstantArgs)
308+
GCM.push_back(llvm::ConstantAsMetadata::get(
309+
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), I)));
310+
MDVals.append({llvm::MDString::get(Ctx, "grid_constant"),
311+
llvm::MDNode::get(Ctx, GCM)});
312+
}
291313
// Append metadata to nvvm.annotations
292314
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
293315
}
@@ -309,7 +331,7 @@ NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
309331
return llvm::ConstantExpr::getAddrSpaceCast(
310332
llvm::ConstantPointerNull::get(NPT), PT);
311333
}
312-
}
334+
} // namespace
313335

314336
void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
315337
const CUDALaunchBoundsAttr *Attr,

clang/lib/Sema/SemaDecl.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12225,8 +12225,17 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD,
1222512225
<< NewFD;
1222612226
}
1222712227

12228-
if (!Redeclaration && LangOpts.CUDA)
12228+
if (!Redeclaration && LangOpts.CUDA) {
12229+
bool IsKernel = NewFD->hasAttr<CUDAGlobalAttr>();
12230+
for (auto *Parm : NewFD->parameters()) {
12231+
if (!Parm->getType()->isDependentType() &&
12232+
Parm->hasAttr<CUDAGridConstantAttr>() &&
12233+
!(IsKernel && Parm->getType().isConstQualified()))
12234+
Diag(Parm->getAttr<CUDAGridConstantAttr>()->getLocation(),
12235+
diag::err_cuda_grid_constant_not_allowed);
12236+
}
1222912237
CUDA().checkTargetOverload(NewFD, Previous);
12238+
}
1223012239
}
1223112240

1223212241
// Check if the function definition uses any AArch64 SME features without

clang/lib/Sema/SemaDeclAttr.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4748,6 +4748,15 @@ static void handleManagedAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
47484748
D->addAttr(CUDADeviceAttr::CreateImplicit(S.Context));
47494749
}
47504750

4751+
static void handleGridConstantAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
4752+
if (D->isInvalidDecl())
4753+
return;
4754+
// Whether __grid_constant__ is allowed to be used will be checked in
4755+
// Sema::CheckFunctionDeclaration as we need complete function decl to make
4756+
// the call.
4757+
D->addAttr(::new (S.Context) CUDAGridConstantAttr(S.Context, AL));
4758+
}
4759+
47514760
static void handleGNUInlineAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
47524761
const auto *Fn = cast<FunctionDecl>(D);
47534762
if (!Fn->isInlineSpecified()) {
@@ -6645,6 +6654,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
66456654
case ParsedAttr::AT_CUDADevice:
66466655
handleDeviceAttr(S, D, AL);
66476656
break;
6657+
case ParsedAttr::AT_CUDAGridConstant:
6658+
handleGridConstantAttr(S, D, AL);
6659+
break;
66486660
case ParsedAttr::AT_HIPManaged:
66496661
handleManagedAttr(S, D, AL);
66506662
break;

clang/lib/Sema/SemaTemplateInstantiateDecl.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -876,6 +876,12 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
876876
continue;
877877
}
878878

879+
if (auto *A = dyn_cast<CUDAGridConstantAttr>(TmplAttr)) {
880+
if (!New->hasAttr<CUDAGridConstantAttr>())
881+
New->addAttr(A->clone(Context));
882+
continue;
883+
}
884+
879885
assert(!TmplAttr->isPackExpansion());
880886
if (TmplAttr->isLateParsed() && LateAttrs) {
881887
// Late parsed attributes must be instantiated and attached after the

clang/test/CodeGenCUDA/Inputs/cuda.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#define __managed__ __attribute__((managed))
1313
#endif
1414
#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
15+
#define __grid_constant__ __attribute__((grid_constant))
1516
#else
1617
#define __constant__
1718
#define __device__
@@ -20,6 +21,7 @@
2021
#define __shared__
2122
#define __managed__
2223
#define __launch_bounds__(...)
24+
#define __grid_constant__
2325
#endif
2426

2527
struct dim3 {
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 5
2+
// RUN: %clang_cc1 "-triple" "nvptx64-nvidia-cuda" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
3+
4+
#include "Inputs/cuda.h"
5+
6+
struct S {};
7+
8+
__global__ void kernel(__grid_constant__ const S gc_arg1, int arg2, __grid_constant__ const int gc_arg3) {}
9+
10+
// dependent arguments get diagnosed after instantiation.
11+
template <typename T>
12+
__global__ void tkernel_const(__grid_constant__ const T arg) {}
13+
14+
template <typename T>
15+
__global__ void tkernel(int dummy, __grid_constant__ T arg) {}
16+
17+
void foo() {
18+
tkernel_const<const S><<<1,1>>>({});
19+
tkernel_const<S><<<1,1>>>({});
20+
tkernel<const S><<<1,1>>>(1, {});
21+
}
22+
//.
23+
//.
24+
// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"kernel", i32 1, !"grid_constant", [[META1:![0-9]+]]}
25+
// CHECK: [[META1]] = !{i32 1, i32 3}
26+
// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3:![0-9]+]]}
27+
// CHECK: [[META3]] = !{i32 1}
28+
// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3]]}
29+
// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"kernel", i32 1, !"grid_constant", [[META6:![0-9]+]]}
30+
// CHECK: [[META6]] = !{i32 2}
31+
//.

clang/test/Misc/pragma-attribute-supported-attributes-list.test

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
// CHECK-NEXT: CUDADeviceBuiltinSurfaceType (SubjectMatchRule_record)
4040
// CHECK-NEXT: CUDADeviceBuiltinTextureType (SubjectMatchRule_record)
4141
// CHECK-NEXT: CUDAGlobal (SubjectMatchRule_function)
42+
// CHECK-NEXT: CUDAGridConstant (SubjectMatchRule_variable_is_parameter)
4243
// CHECK-NEXT: CUDAHost (SubjectMatchRule_function)
4344
// CHECK-NEXT: CUDALaunchBounds (SubjectMatchRule_objc_method, SubjectMatchRule_hasType_functionType)
4445
// CHECK-NEXT: CUDAShared (SubjectMatchRule_variable)

clang/test/SemaCUDA/Inputs/cuda.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#define __host__ __attribute__((host))
1212
#define __shared__ __attribute__((shared))
1313
#define __managed__ __attribute__((managed))
14+
#define __grid_constant__ __attribute__((grid_constant))
1415
#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
1516

1617
struct dim3 {

clang/test/SemaCUDA/grid-constant.cu

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// RUN: %clang_cc1 -fsyntax-only -verify %s
2+
// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s
3+
#include "Inputs/cuda.h"
4+
5+
struct S {};
6+
7+
__global__ void kernel_struct(__grid_constant__ const S arg) {}
8+
__global__ void kernel_scalar(__grid_constant__ const int arg) {}
9+
10+
__global__ void gc_kernel_non_const(__grid_constant__ S arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}
11+
12+
void non_kernel(__grid_constant__ S arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}
13+
14+
// templates w/ non-dependent argument types get diagnosed right
15+
// away, without instantiation.
16+
template <typename T>
17+
__global__ void tkernel_nd_const(__grid_constant__ const S arg, T dummy) {}
18+
template <typename T>
19+
__global__ void tkernel_nd_non_const(__grid_constant__ S arg, T dummy) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}
20+
21+
// dependent arguments get diagnosed after instantiation.
22+
template <typename T>
23+
__global__ void tkernel_const(__grid_constant__ const T arg) {}
24+
25+
template <typename T>
26+
__global__ void tkernel(__grid_constant__ T arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}
27+
28+
void foo() {
29+
tkernel_const<const S><<<1,1>>>({});
30+
tkernel_const<S><<<1,1>>>({});
31+
tkernel<const S><<<1,1>>>({});
32+
tkernel<S><<<1,1>>>({}); // expected-note {{in instantiation of function template specialization 'tkernel<S>' requested here}}
33+
}

0 commit comments

Comments
 (0)