Skip to content

Reland [NVPTX] Add support for maxclusterrank in launch_bounds (#66496) #67667

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1267,7 +1267,8 @@ def CUDAInvalidTarget : InheritableAttr {

def CUDALaunchBounds : InheritableAttr {
let Spellings = [GNU<"launch_bounds">, Declspec<"__launch_bounds__">];
let Args = [ExprArgument<"MaxThreads">, ExprArgument<"MinBlocks", 1>];
let Args = [ExprArgument<"MaxThreads">, ExprArgument<"MinBlocks", 1>,
ExprArgument<"MaxBlocks", 1>];
let LangOpts = [CUDA];
let Subjects = SubjectList<[ObjCMethod, FunctionLike]>;
// An AST node is created for this attribute, but is not used by other parts
Expand Down
4 changes: 4 additions & 0 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -11850,6 +11850,10 @@ def err_sycl_special_type_num_init_method : Error<
"types with 'sycl_special_class' attribute must have one and only one '__init' "
"method defined">;

def warn_cuda_maxclusterrank_sm_90 : Warning<
"maxclusterrank requires sm_90 or higher, CUDA arch provided: %0, ignoring "
"%1 attribute">, InGroup<IgnoredAttributes>;

def err_bit_int_bad_size : Error<"%select{signed|unsigned}0 _BitInt must "
"have a bit size of at least %select{2|1}0">;
def err_bit_int_max_size : Error<"%select{signed|unsigned}0 _BitInt of bit "
Expand Down
5 changes: 3 additions & 2 deletions clang/include/clang/Sema/Sema.h
Original file line number Diff line number Diff line change
Expand Up @@ -11053,12 +11053,13 @@ class Sema final {
/// Create an CUDALaunchBoundsAttr attribute.
CUDALaunchBoundsAttr *CreateLaunchBoundsAttr(const AttributeCommonInfo &CI,
Expr *MaxThreads,
Expr *MinBlocks);
Expr *MinBlocks,
Expr *MaxBlocks);

/// AddLaunchBoundsAttr - Adds a launch_bounds attribute to a particular
/// declaration.
void AddLaunchBoundsAttr(Decl *D, const AttributeCommonInfo &CI,
Expr *MaxThreads, Expr *MinBlocks);
Expr *MaxThreads, Expr *MinBlocks, Expr *MaxBlocks);

/// AddModeAttr - Adds a mode attribute to a particular declaration.
void AddModeAttr(Decl *D, const AttributeCommonInfo &CI, IdentifierInfo *Name,
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Basic/Targets/NVPTX.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo {

bool hasBitIntType() const override { return true; }
bool hasBFloat16Type() const override { return true; }

CudaArch getGPU() const { return GPU; }
};
} // namespace targets
} // namespace clang
Expand Down
12 changes: 10 additions & 2 deletions clang/lib/CodeGen/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,8 +296,8 @@ void CodeGenModule::handleCUDALaunchBoundsAttr(
NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxntidx",
MaxThreads.getExtValue());

// min blocks is an optional argument for CUDALaunchBoundsAttr. If it was
// not specified in __launch_bounds__ or if the user specified a 0 value,
// min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it
// was not specified in __launch_bounds__ or if the user specified a 0 value,
// we don't have to add a PTX directive.
if (Attr->getMinBlocks()) {
llvm::APSInt MinBlocks(32);
Expand All @@ -307,6 +307,14 @@ void CodeGenModule::handleCUDALaunchBoundsAttr(
NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm",
MinBlocks.getExtValue());
}
if (Attr->getMaxBlocks()) {
llvm::APSInt MaxBlocks(32);
MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext());
if (MaxBlocks > 0)
// Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node
NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank",
MaxBlocks.getExtValue());
}
}

std::unique_ptr<TargetCodeGenInfo>
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Parse/ParseOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3739,7 +3739,8 @@ OMPClause *Parser::ParseOpenMPOMPXAttributesClause(bool ParseOnly) {
continue;
if (auto *A = Actions.CreateLaunchBoundsAttr(
PA, PA.getArgAsExpr(0),
PA.getNumArgs() > 1 ? PA.getArgAsExpr(1) : nullptr))
PA.getNumArgs() > 1 ? PA.getArgAsExpr(1) : nullptr,
PA.getNumArgs() > 2 ? PA.getArgAsExpr(2) : nullptr))
Attrs.push_back(A);
continue;
default:
Expand Down
43 changes: 34 additions & 9 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5608,6 +5608,14 @@ bool Sema::CheckRegparmAttr(const ParsedAttr &AL, unsigned &numParams) {
return false;
}

// Helper to get CudaArch.
static CudaArch getCudaArch(const TargetInfo &TI) {
if (!TI.getTriple().isNVPTX())
llvm_unreachable("getCudaArch is only valid for NVPTX triple");
auto &TO = TI.getTargetOpts();
return StringToCudaArch(TO.CPU);
}

// Checks whether an argument of launch_bounds attribute is
// acceptable, performs implicit conversion to Rvalue, and returns
// non-nullptr Expr result on success. Otherwise, it returns nullptr
Expand Down Expand Up @@ -5651,34 +5659,51 @@ static Expr *makeLaunchBoundsArgExpr(Sema &S, Expr *E,

CUDALaunchBoundsAttr *
Sema::CreateLaunchBoundsAttr(const AttributeCommonInfo &CI, Expr *MaxThreads,
Expr *MinBlocks) {
CUDALaunchBoundsAttr TmpAttr(Context, CI, MaxThreads, MinBlocks);
Expr *MinBlocks, Expr *MaxBlocks) {
CUDALaunchBoundsAttr TmpAttr(Context, CI, MaxThreads, MinBlocks, MaxBlocks);
MaxThreads = makeLaunchBoundsArgExpr(*this, MaxThreads, TmpAttr, 0);
if (MaxThreads == nullptr)
if (!MaxThreads)
return nullptr;

if (MinBlocks) {
MinBlocks = makeLaunchBoundsArgExpr(*this, MinBlocks, TmpAttr, 1);
if (MinBlocks == nullptr)
if (!MinBlocks)
return nullptr;
}

if (MaxBlocks) {
// '.maxclusterrank' ptx directive requires .target sm_90 or higher.
auto SM = getCudaArch(Context.getTargetInfo());
if (SM == CudaArch::UNKNOWN || SM < CudaArch::SM_90) {
Diag(MaxBlocks->getBeginLoc(), diag::warn_cuda_maxclusterrank_sm_90)
<< CudaArchToString(SM) << CI << MaxBlocks->getSourceRange();
// Ignore it by setting MaxBlocks to null;
MaxBlocks = nullptr;
} else {
MaxBlocks = makeLaunchBoundsArgExpr(*this, MaxBlocks, TmpAttr, 2);
if (!MaxBlocks)
return nullptr;
}
}

return ::new (Context)
CUDALaunchBoundsAttr(Context, CI, MaxThreads, MinBlocks);
CUDALaunchBoundsAttr(Context, CI, MaxThreads, MinBlocks, MaxBlocks);
}

void Sema::AddLaunchBoundsAttr(Decl *D, const AttributeCommonInfo &CI,
Expr *MaxThreads, Expr *MinBlocks) {
if (auto *Attr = CreateLaunchBoundsAttr(CI, MaxThreads, MinBlocks))
Expr *MaxThreads, Expr *MinBlocks,
Expr *MaxBlocks) {
if (auto *Attr = CreateLaunchBoundsAttr(CI, MaxThreads, MinBlocks, MaxBlocks))
D->addAttr(Attr);
}

static void handleLaunchBoundsAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
if (!AL.checkAtLeastNumArgs(S, 1) || !AL.checkAtMostNumArgs(S, 2))
if (!AL.checkAtLeastNumArgs(S, 1) || !AL.checkAtMostNumArgs(S, 3))
return;

S.AddLaunchBoundsAttr(D, AL, AL.getArgAsExpr(0),
AL.getNumArgs() > 1 ? AL.getArgAsExpr(1) : nullptr);
AL.getNumArgs() > 1 ? AL.getArgAsExpr(1) : nullptr,
AL.getNumArgs() > 2 ? AL.getArgAsExpr(2) : nullptr);
}

static void handleArgumentWithTypeTagAttr(Sema &S, Decl *D,
Expand Down
10 changes: 9 additions & 1 deletion clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,15 @@ static void instantiateDependentCUDALaunchBoundsAttr(
MinBlocks = Result.getAs<Expr>();
}

S.AddLaunchBoundsAttr(New, Attr, MaxThreads, MinBlocks);
Expr *MaxBlocks = nullptr;
if (Attr.getMaxBlocks()) {
Result = S.SubstExpr(Attr.getMaxBlocks(), TemplateArgs);
if (Result.isInvalid())
return;
MaxBlocks = Result.getAs<Expr>();
}

S.AddLaunchBoundsAttr(New, Attr, MaxThreads, MinBlocks, MaxBlocks);
}

static void
Expand Down
69 changes: 69 additions & 0 deletions clang/test/CodeGenCUDA/launch-bounds.cu
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -target-cpu sm_90 -DUSE_MAX_BLOCKS -fcuda-is-device -emit-llvm -o - | FileCheck -check-prefix=CHECK_MAX_BLOCKS %s

#include "Inputs/cuda.h"

#define MAX_THREADS_PER_BLOCK 256
#define MIN_BLOCKS_PER_MP 2
#ifdef USE_MAX_BLOCKS
#define MAX_BLOCKS_PER_MP 4
#endif

// Test both max threads per block and Min cta per sm.
extern "C" {
Expand All @@ -17,6 +21,21 @@ Kernel1()
// CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"maxntidx", i32 256}
// CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"minctasm", i32 2}

#ifdef USE_MAX_BLOCKS
// Test max threads per block and min/max cta per sm.
extern "C" {
__global__ void
__launch_bounds__( MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP )
Kernel1_sm_90()
{
}
}

// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"maxntidx", i32 256}
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"minctasm", i32 2}
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"maxclusterrank", i32 4}
#endif // USE_MAX_BLOCKS

// Test only max threads per block. Min cta per sm defaults to 0, and
// CodeGen doesn't output a zero value for minctasm.
extern "C" {
Expand Down Expand Up @@ -50,6 +69,20 @@ template __global__ void Kernel4<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256}
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2}

#ifdef USE_MAX_BLOCKS
template <int max_threads_per_block, int min_blocks_per_mp, int max_blocks_per_mp>
__global__ void
__launch_bounds__(max_threads_per_block, min_blocks_per_mp, max_blocks_per_mp)
Kernel4_sm_90()
{
}
template __global__ void Kernel4_sm_90<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP>();

// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"maxntidx", i32 256}
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"minctasm", i32 2}
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"maxclusterrank", i32 4}
#endif //USE_MAX_BLOCKS

const int constint = 100;
template <int max_threads_per_block, int min_blocks_per_mp>
__global__ void
Expand All @@ -63,6 +96,23 @@ template __global__ void Kernel5<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"maxntidx", i32 356}
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"minctasm", i32 258}

#ifdef USE_MAX_BLOCKS

template <int max_threads_per_block, int min_blocks_per_mp, int max_blocks_per_mp>
__global__ void
__launch_bounds__(max_threads_per_block + constint,
min_blocks_per_mp + max_threads_per_block,
max_blocks_per_mp + max_threads_per_block)
Kernel5_sm_90()
{
}
template __global__ void Kernel5_sm_90<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP>();

// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"maxntidx", i32 356}
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"minctasm", i32 258}
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"maxclusterrank", i32 260}
#endif //USE_MAX_BLOCKS

// Make sure we don't emit negative launch bounds values.
__global__ void
__launch_bounds__( -MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP )
Expand All @@ -80,7 +130,26 @@ Kernel7()
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7{{.*}}, !"maxntidx",
// CHECK-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7{{.*}}, !"minctasm",

#ifdef USE_MAX_BLOCKS
__global__ void
__launch_bounds__( MAX_THREADS_PER_BLOCK, -MIN_BLOCKS_PER_MP, -MAX_BLOCKS_PER_MP )
Kernel7_sm_90()
{
}
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7_sm_90{{.*}}, !"maxntidx",
// CHECK_MAX_BLOCKS-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7_sm_90{{.*}}, !"minctasm",
// CHECK_MAX_BLOCKS-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7_sm_90{{.*}}, !"maxclusterrank",
#endif // USE_MAX_BLOCKS

const char constchar = 12;
__global__ void __launch_bounds__(constint, constchar) Kernel8() {}
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"maxntidx", i32 100
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"minctasm", i32 12

#ifdef USE_MAX_BLOCKS
const char constchar_2 = 14;
__global__ void __launch_bounds__(constint, constchar, constchar_2) Kernel8_sm_90() {}
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"maxntidx", i32 100
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"minctasm", i32 12
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"maxclusterrank", i32 14
#endif // USE_MAX_BLOCKS
7 changes: 5 additions & 2 deletions clang/test/SemaCUDA/launch_bounds.cu
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -triple nvptx-unknown-unknown -target-cpu sm_75 -verify %s

#include "Inputs/cuda.h"

Expand All @@ -11,8 +11,9 @@ __launch_bounds__(0x10000000000000000) void TestWayTooBigArg(void); // expected-

__launch_bounds__(-128, 7) void TestNegArg1(void); // expected-warning {{'launch_bounds' attribute parameter 0 is negative and will be ignored}}
__launch_bounds__(128, -7) void TestNegArg2(void); // expected-warning {{'launch_bounds' attribute parameter 1 is negative and will be ignored}}
__launch_bounds__(128, 2, -8) void TestNegArg2(void); // expected-warning {{maxclusterrank requires sm_90 or higher, CUDA arch provided: sm_75, ignoring 'launch_bounds' attribute}}

__launch_bounds__(1, 2, 3) void Test3Args(void); // expected-error {{'launch_bounds' attribute takes no more than 2 arguments}}
__launch_bounds__(1, 2, 3, 4) void Test4Args(void); // expected-error {{'launch_bounds' attribute takes no more than 3 arguments}}
__launch_bounds__() void TestNoArgs(void); // expected-error {{'launch_bounds' attribute takes at least 1 argument}}

int TestNoFunction __launch_bounds__(128, 7); // expected-warning {{'launch_bounds' attribute only applies to Objective-C methods, functions, and function pointers}}
Expand Down Expand Up @@ -47,3 +48,5 @@ __launch_bounds__(Args) void TestTemplateVariadicArgs(void) {} // expected-error

template <int... Args>
__launch_bounds__(1, Args) void TestTemplateVariadicArgs2(void) {} // expected-error {{expression contains unexpanded parameter pack 'Args'}}

__launch_bounds__(1, 2, 3) void Test3Args(void); // expected-warning {{maxclusterrank requires sm_90 or higher, CUDA arch provided: sm_75, ignoring 'launch_bounds' attribute}}
57 changes: 57 additions & 0 deletions clang/test/SemaCUDA/launch_bounds_sm_90.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -triple nvptx-unknown-unknown -target-cpu sm_90 -verify %s

#include "Inputs/cuda.h"

__launch_bounds__(128, 7) void Test2Args(void);
__launch_bounds__(128) void Test1Arg(void);

__launch_bounds__(0xffffffff) void TestMaxArg(void);
__launch_bounds__(0x100000000) void TestTooBigArg(void); // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
__launch_bounds__(0x10000000000000000) void TestWayTooBigArg(void); // expected-error {{integer literal is too large to be represented in any integer type}}
__launch_bounds__(1, 1, 0x10000000000000000) void TestWayTooBigArg(void); // expected-error {{integer literal is too large to be represented in any integer type}}

__launch_bounds__(-128, 7) void TestNegArg1(void); // expected-warning {{'launch_bounds' attribute parameter 0 is negative and will be ignored}}
__launch_bounds__(128, -7) void TestNegArg2(void); // expected-warning {{'launch_bounds' attribute parameter 1 is negative and will be ignored}}
__launch_bounds__(-128, 1, 7) void TestNegArg2(void); // expected-warning {{'launch_bounds' attribute parameter 0 is negative and will be ignored}}
__launch_bounds__(128, -1, 7) void TestNegArg2(void); // expected-warning {{'launch_bounds' attribute parameter 1 is negative and will be ignored}}
__launch_bounds__(128, 1, -7) void TestNegArg2(void); // expected-warning {{'launch_bounds' attribute parameter 2 is negative and will be ignored}}
// expected-warning@20 {{'launch_bounds' attribute parameter 0 is negative and will be ignored}}
// expected-warning@20 {{'launch_bounds' attribute parameter 1 is negative and will be ignored}}
__launch_bounds__(-128, -1, 7) void TestNegArg2(void);
// expected-warning@23 {{'launch_bounds' attribute parameter 0 is negative and will be ignored}}
// expected-warning@23 {{'launch_bounds' attribute parameter 2 is negative and will be ignored}}
__launch_bounds__(-128, 1, -7) void TestNegArg2(void);
// expected-warning@27 {{'launch_bounds' attribute parameter 0 is negative and will be ignored}}
// expected-warning@27 {{'launch_bounds' attribute parameter 1 is negative and will be ignored}}
// expected-warning@27 {{'launch_bounds' attribute parameter 2 is negative and will be ignored}}
__launch_bounds__(-128, -1, -7) void TestNegArg2(void);


__launch_bounds__(1, 2, 3, 4) void Test4Args(void); // expected-error {{'launch_bounds' attribute takes no more than 3 arguments}}
__launch_bounds__() void TestNoArgs(void); // expected-error {{'launch_bounds' attribute takes at least 1 argument}}

int TestNoFunction __launch_bounds__(128, 7, 13); // expected-warning {{'launch_bounds' attribute only applies to Objective-C methods, functions, and function pointers}}

__launch_bounds__(true) void TestBool(void);
__launch_bounds__(128, 1, 128.0) void TestFP(void); // expected-error {{'launch_bounds' attribute requires parameter 2 to be an integer constant}}
__launch_bounds__(128, 1, (void*)0) void TestNullptr(void); // expected-error {{'launch_bounds' attribute requires parameter 2 to be an integer constant}}

int nonconstint = 256;
__launch_bounds__(125, 1, nonconstint) void TestNonConstInt(void); // expected-error {{'launch_bounds' attribute requires parameter 2 to be an integer constant}}

const int constint = 512;
__launch_bounds__(128, 1, constint) void TestConstInt(void);
__launch_bounds__(128, 1, constint * 2 + 3) void TestConstIntExpr(void);

template <int a, int b, int c> __launch_bounds__(a, b, c) void TestTemplate2Args(void) {}
template void TestTemplate2Args<128,7, 13>(void);

template <int a, int b, int c>
__launch_bounds__(a + b, c + constint, a + b + c + constint) void TestTemplateExpr(void) {}
template void TestTemplateExpr<128+constint, 3, 7>(void);

template <int... Args>
__launch_bounds__(Args) void TestTemplateVariadicArgs(void) {} // expected-error {{expression contains unexpanded parameter pack 'Args'}}

template <int... Args>
__launch_bounds__(1, 22, Args) void TestTemplateVariadicArgs2(void) {} // expected-error {{expression contains unexpanded parameter pack 'Args'}}
Loading