Skip to content

[CUDA] Add support for __grid_constant__ attribute #114589

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,7 @@ CUDA Support
^^^^^^^^^^^^
- Clang now supports CUDA SDK up to 12.6
- Added support for sm_100
- Added support for `__grid_constant__` attribute.

AIX Support
^^^^^^^^^^^
Expand Down
7 changes: 7 additions & 0 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1450,6 +1450,13 @@ def CUDAHost : InheritableAttr {
}
def : MutualExclusions<[CUDAGlobal, CUDAHost]>;

def CUDAGridConstant : InheritableAttr {
let Spellings = [GNU<"grid_constant">, Declspec<"__grid_constant__">];
let Subjects = SubjectList<[ParmVar]>;
let LangOpts = [CUDA];
let Documentation = [CUDAGridConstantAttrDocs];
}

def NVPTXKernel : InheritableAttr, TargetSpecificAttr<TargetNVPTX> {
let Spellings = [Clang<"nvptx_kernel">];
let Subjects = SubjectList<[Function]>;
Expand Down
10 changes: 10 additions & 0 deletions clang/include/clang/Basic/AttrDocs.td
Original file line number Diff line number Diff line change
Expand Up @@ -6620,6 +6620,16 @@ unbind runtime APIs.
}];
}

def CUDAGridConstantAttrDocs : Documentation {
let Category = DocCatDecl;
let Content = [{
The ``__grid_constant__`` attribute can be applied to a ``const``-qualified kernel
function argument and allows compiler to take the address of that argument without
making a copy. The argument applies to sm_70 or newer GPUs, during compilation
with CUDA-11.7(PTX 7.7) or newer, and is ignored otherwise.
}];
}

def HIPManagedAttrDocs : Documentation {
let Category = DocCatDecl;
let Content = [{
Expand Down
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -9100,6 +9100,8 @@ def err_cuda_host_shared : Error<
"%select{__device__|__global__|__host__|__host__ __device__}0 functions">;
def err_cuda_nonstatic_constdev: Error<"__constant__, __device__, and "
"__managed__ are not allowed on non-static local variables">;
def err_cuda_grid_constant_not_allowed : Error<
"__grid_constant__ is only allowed on const-qualified kernel parameters">;
def err_cuda_ovl_target : Error<
"%select{__device__|__global__|__host__|__host__ __device__}0 function %1 "
"cannot overload %select{__device__|__global__|__host__|__host__ __device__}2 function %3">;
Expand Down
36 changes: 29 additions & 7 deletions clang/lib/CodeGen/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "ABIInfoImpl.h"
#include "TargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/IntrinsicsNVPTX.h"

using namespace clang;
Expand Down Expand Up @@ -78,7 +79,13 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
// resulting MDNode to the nvvm.annotations MDNode.
static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
int Operand);
int Operand,
const SmallVectorImpl<int> &GridConstantArgs);

static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
int Operand) {
addNVVMMetadata(GV, Name, Operand, SmallVector<int, 1>(0));
}

private:
static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
Expand Down Expand Up @@ -240,7 +247,8 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
}

const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD) return;
if (!FD)
return;

llvm::Function *F = cast<llvm::Function>(GV);

Expand All @@ -263,8 +271,13 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
// __global__ functions cannot be called from the device, we do not
// need to set the noinline attribute.
if (FD->hasAttr<CUDAGlobalAttr>()) {
SmallVector<int, 10> GCI;
for (auto IV : llvm::enumerate(FD->parameters()))
if (IV.value()->hasAttr<CUDAGridConstantAttr>())
// For some reason arg indices are 1-based in NVVM
GCI.push_back(IV.index() + 1);
// Create !{<func-ref>, metadata !"kernel", i32 1} node
addNVVMMetadata(F, "kernel", 1);
addNVVMMetadata(F, "kernel", 1, GCI);
}
if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
M.handleCUDALaunchBoundsAttr(F, Attr);
Expand All @@ -276,18 +289,27 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
}
}

void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
StringRef Name, int Operand) {
void NVPTXTargetCodeGenInfo::addNVVMMetadata(
llvm::GlobalValue *GV, StringRef Name, int Operand,
const SmallVectorImpl<int> &GridConstantArgs) {
llvm::Module *M = GV->getParent();
llvm::LLVMContext &Ctx = M->getContext();

// Get "nvvm.annotations" metadata node
llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");

llvm::Metadata *MDVals[] = {
SmallVector<llvm::Metadata *, 5> MDVals = {
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
if (!GridConstantArgs.empty()) {
SmallVector<llvm::Metadata *, 10> GCM;
for (int I : GridConstantArgs)
GCM.push_back(llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), I)));
MDVals.append({llvm::MDString::get(Ctx, "grid_constant"),
llvm::MDNode::get(Ctx, GCM)});
}
// Append metadata to nvvm.annotations
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
}
Expand All @@ -309,7 +331,7 @@ NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
return llvm::ConstantExpr::getAddrSpaceCast(
llvm::ConstantPointerNull::get(NPT), PT);
}
}
} // namespace

void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
const CUDALaunchBoundsAttr *Attr,
Expand Down
11 changes: 10 additions & 1 deletion clang/lib/Sema/SemaDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12222,8 +12222,17 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD,
<< NewFD;
}

if (!Redeclaration && LangOpts.CUDA)
if (!Redeclaration && LangOpts.CUDA) {
bool IsKernel = NewFD->hasAttr<CUDAGlobalAttr>();
for (auto *Parm : NewFD->parameters()) {
if (!Parm->getType()->isDependentType() &&
Parm->hasAttr<CUDAGridConstantAttr>() &&
!(IsKernel && Parm->getType().isConstQualified()))
Diag(Parm->getAttr<CUDAGridConstantAttr>()->getLocation(),
diag::err_cuda_grid_constant_not_allowed);
}
CUDA().checkTargetOverload(NewFD, Previous);
}
}

// Check if the function definition uses any AArch64 SME features without
Expand Down
12 changes: 12 additions & 0 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4748,6 +4748,15 @@ static void handleManagedAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
D->addAttr(CUDADeviceAttr::CreateImplicit(S.Context));
}

static void handleGridConstantAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
if (D->isInvalidDecl())
return;
// Whether __grid_constant__ is allowed to be used will be checked in
// Sema::CheckFunctionDeclaration as we need complete function decl to make
// the call.
D->addAttr(::new (S.Context) CUDAGridConstantAttr(S.Context, AL));
}

static void handleGNUInlineAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
const auto *Fn = cast<FunctionDecl>(D);
if (!Fn->isInlineSpecified()) {
Expand Down Expand Up @@ -6642,6 +6651,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
case ParsedAttr::AT_CUDADevice:
handleDeviceAttr(S, D, AL);
break;
case ParsedAttr::AT_CUDAGridConstant:
handleGridConstantAttr(S, D, AL);
break;
case ParsedAttr::AT_HIPManaged:
handleManagedAttr(S, D, AL);
break;
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -876,6 +876,12 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
continue;
}

if (auto *A = dyn_cast<CUDAGridConstantAttr>(TmplAttr)) {
if (!New->hasAttr<CUDAGridConstantAttr>())
New->addAttr(A->clone(Context));
continue;
}

assert(!TmplAttr->isPackExpansion());
if (TmplAttr->isLateParsed() && LateAttrs) {
// Late parsed attributes must be instantiated and attached after the
Expand Down
2 changes: 2 additions & 0 deletions clang/test/CodeGenCUDA/Inputs/cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#define __managed__ __attribute__((managed))
#endif
#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
#define __grid_constant__ __attribute__((grid_constant))
#else
#define __constant__
#define __device__
Expand All @@ -20,6 +21,7 @@
#define __shared__
#define __managed__
#define __launch_bounds__(...)
#define __grid_constant__
#endif

struct dim3 {
Expand Down
31 changes: 31 additions & 0 deletions clang/test/CodeGenCUDA/grid-constant.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 5
// RUN: %clang_cc1 "-triple" "nvptx64-nvidia-cuda" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s

#include "Inputs/cuda.h"

struct S {};

__global__ void kernel(__grid_constant__ const S gc_arg1, int arg2, __grid_constant__ const int gc_arg3) {}

// dependent arguments get diagnosed after instantiation.
template <typename T>
__global__ void tkernel_const(__grid_constant__ const T arg) {}

template <typename T>
__global__ void tkernel(int dummy, __grid_constant__ T arg) {}

void foo() {
tkernel_const<const S><<<1,1>>>({});
tkernel_const<S><<<1,1>>>({});
tkernel<const S><<<1,1>>>(1, {});
}
//.
//.
// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"kernel", i32 1, !"grid_constant", [[META1:![0-9]+]]}
// CHECK: [[META1]] = !{i32 1, i32 3}
// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3:![0-9]+]]}
// CHECK: [[META3]] = !{i32 1}
// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3]]}
// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"kernel", i32 1, !"grid_constant", [[META6:![0-9]+]]}
// CHECK: [[META6]] = !{i32 2}
//.
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
// CHECK-NEXT: CUDADeviceBuiltinSurfaceType (SubjectMatchRule_record)
// CHECK-NEXT: CUDADeviceBuiltinTextureType (SubjectMatchRule_record)
// CHECK-NEXT: CUDAGlobal (SubjectMatchRule_function)
// CHECK-NEXT: CUDAGridConstant (SubjectMatchRule_variable_is_parameter)
// CHECK-NEXT: CUDAHost (SubjectMatchRule_function)
// CHECK-NEXT: CUDALaunchBounds (SubjectMatchRule_objc_method, SubjectMatchRule_hasType_functionType)
// CHECK-NEXT: CUDAShared (SubjectMatchRule_variable)
Expand Down
1 change: 1 addition & 0 deletions clang/test/SemaCUDA/Inputs/cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#define __host__ __attribute__((host))
#define __shared__ __attribute__((shared))
#define __managed__ __attribute__((managed))
#define __grid_constant__ __attribute__((grid_constant))
#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))

struct dim3 {
Expand Down
33 changes: 33 additions & 0 deletions clang/test/SemaCUDA/grid-constant.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// RUN: %clang_cc1 -fsyntax-only -verify %s
// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s
#include "Inputs/cuda.h"

struct S {};

__global__ void kernel_struct(__grid_constant__ const S arg) {}
__global__ void kernel_scalar(__grid_constant__ const int arg) {}

__global__ void gc_kernel_non_const(__grid_constant__ S arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}

void non_kernel(__grid_constant__ S arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}

// templates w/ non-dependent argument types get diagnosed right
// away, without instantiation.
template <typename T>
__global__ void tkernel_nd_const(__grid_constant__ const S arg, T dummy) {}
template <typename T>
__global__ void tkernel_nd_non_const(__grid_constant__ S arg, T dummy) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}

// dependent arguments get diagnosed after instantiation.
template <typename T>
__global__ void tkernel_const(__grid_constant__ const T arg) {}

template <typename T>
__global__ void tkernel(__grid_constant__ T arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}

void foo() {
tkernel_const<const S><<<1,1>>>({});
tkernel_const<S><<<1,1>>>({});
tkernel<const S><<<1,1>>>({});
tkernel<S><<<1,1>>>({}); // expected-note {{in instantiation of function template specialization 'tkernel<S>' requested here}}
}
Loading