Skip to content

Commit c4e517f

Browse files
jwanggit86Jun Wang
andauthored
[AMDGPU] Adding the amdgpu_num_work_groups function attribute (#79035)
A new function attribute named amdgpu_num_work_groups is added. This attribute, which consists of three integers, allows programmers to let the compiler know the number of workgroups to be launched in each of the three dimensions and do optimizations based on that information. --------- Co-authored-by: Jun Wang <[email protected]>
1 parent 93503aa commit c4e517f

21 files changed

+628
-1
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,12 @@ Removed Compiler Flags
194194

195195
Attribute Changes in Clang
196196
--------------------------
197+
- Introduced a new function attribute ``__attribute__((amdgpu_max_num_work_groups(x, y, z)))`` or
198+
``[[clang::amdgpu_max_num_work_groups(x, y, z)]]`` for the AMDGPU target. This attribute can be
199+
attached to HIP or OpenCL kernel function definitions to provide an optimization hint. The parameters
200+
``x``, ``y``, and ``z`` specify the maximum number of workgroups for the respective dimensions,
201+
and each must be a positive integer when provided. The parameter ``x`` is required, while ``y`` and
202+
``z`` are optional with default value of 1.
197203

198204
Improvements to Clang's diagnostics
199205
-----------------------------------

clang/include/clang/Basic/Attr.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2054,6 +2054,13 @@ def AMDGPUNumVGPR : InheritableAttr {
20542054
let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">;
20552055
}
20562056

2057+
def AMDGPUMaxNumWorkGroups : InheritableAttr {
2058+
let Spellings = [Clang<"amdgpu_max_num_work_groups", 0>];
2059+
let Args = [ExprArgument<"MaxNumWorkGroupsX">, ExprArgument<"MaxNumWorkGroupsY", 1>, ExprArgument<"MaxNumWorkGroupsZ", 1>];
2060+
let Documentation = [AMDGPUMaxNumWorkGroupsDocs];
2061+
let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">;
2062+
}
2063+
20572064
def AMDGPUKernelCall : DeclOrTypeAttr {
20582065
let Spellings = [Clang<"amdgpu_kernel">];
20592066
let Documentation = [Undocumented];

clang/include/clang/Basic/AttrDocs.td

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2741,6 +2741,33 @@ An error will be given if:
27412741
}];
27422742
}
27432743

2744+
def AMDGPUMaxNumWorkGroupsDocs : Documentation {
2745+
let Category = DocCatAMDGPUAttributes;
2746+
let Content = [{
2747+
This attribute specifies the max number of work groups when the kernel
2748+
is dispatched.
2749+
2750+
Clang supports the
2751+
``__attribute__((amdgpu_max_num_work_groups(<x>, <y>, <z>)))`` or
2752+
``[[clang::amdgpu_max_num_work_groups(<x>, <y>, <z>)]]`` attribute for the
2753+
AMDGPU target. This attribute may be attached to HIP or OpenCL kernel function
2754+
definitions and is an optimization hint.
2755+
2756+
The ``<x>`` parameter specifies the maximum number of work groups in the x dimension.
2757+
Similarly ``<y>`` and ``<z>`` are for the y and z dimensions respectively.
2758+
Each of the three values must be greater than 0 when provided. The ``<x>`` parameter
2759+
is required, while ``<y>`` and ``<z>`` are optional with default value of 1.
2760+
2761+
If specified, the AMDGPU target backend might be able to produce better machine
2762+
code.
2763+
2764+
An error will be given if:
2765+
- Specified values violate subtarget specifications;
2766+
- Specified values are not compatible with values provided through other
2767+
attributes.
2768+
}];
2769+
}
2770+
27442771
def DocCatCallingConvs : DocumentationCategory<"Calling Conventions"> {
27452772
let Content = [{
27462773
Clang supports several different calling conventions, depending on the target

clang/include/clang/Sema/Sema.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3911,6 +3911,16 @@ class Sema final {
39113911
void addAMDGPUWavesPerEUAttr(Decl *D, const AttributeCommonInfo &CI,
39123912
Expr *Min, Expr *Max);
39133913

3914+
/// Create an AMDGPUMaxNumWorkGroupsAttr attribute.
3915+
AMDGPUMaxNumWorkGroupsAttr *
3916+
CreateAMDGPUMaxNumWorkGroupsAttr(const AttributeCommonInfo &CI, Expr *XExpr,
3917+
Expr *YExpr, Expr *ZExpr);
3918+
3919+
/// addAMDGPUMaxNumWorkGroupsAttr - Adds an amdgpu_max_num_work_groups
3920+
/// attribute to a particular declaration.
3921+
void addAMDGPUMaxNumWorkGroupsAttr(Decl *D, const AttributeCommonInfo &CI,
3922+
Expr *XExpr, Expr *YExpr, Expr *ZExpr);
3923+
39143924
DLLImportAttr *mergeDLLImportAttr(Decl *D, const AttributeCommonInfo &CI);
39153925
DLLExportAttr *mergeDLLExportAttr(Decl *D, const AttributeCommonInfo &CI);
39163926
MSInheritanceAttr *mergeMSInheritanceAttr(Decl *D,

clang/lib/CodeGen/Targets/AMDGPU.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,29 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
356356
if (NumVGPR != 0)
357357
F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
358358
}
359+
360+
if (const auto *Attr = FD->getAttr<AMDGPUMaxNumWorkGroupsAttr>()) {
361+
uint32_t X = Attr->getMaxNumWorkGroupsX()
362+
->EvaluateKnownConstInt(M.getContext())
363+
.getExtValue();
364+
// Y and Z dimensions default to 1 if not specified
365+
uint32_t Y = Attr->getMaxNumWorkGroupsY()
366+
? Attr->getMaxNumWorkGroupsY()
367+
->EvaluateKnownConstInt(M.getContext())
368+
.getExtValue()
369+
: 1;
370+
uint32_t Z = Attr->getMaxNumWorkGroupsZ()
371+
? Attr->getMaxNumWorkGroupsZ()
372+
->EvaluateKnownConstInt(M.getContext())
373+
.getExtValue()
374+
: 1;
375+
376+
llvm::SmallString<32> AttrVal;
377+
llvm::raw_svector_ostream OS(AttrVal);
378+
OS << X << ',' << Y << ',' << Z;
379+
380+
F->addFnAttr("amdgpu-max-num-workgroups", AttrVal.str());
381+
}
359382
}
360383

361384
/// Emits control constants used to change per-architecture behaviour in the

clang/lib/Sema/SemaDeclAttr.cpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8079,6 +8079,65 @@ static void handleAMDGPUNumVGPRAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
80798079
D->addAttr(::new (S.Context) AMDGPUNumVGPRAttr(S.Context, AL, NumVGPR));
80808080
}
80818081

8082+
static bool
8083+
checkAMDGPUMaxNumWorkGroupsArguments(Sema &S, Expr *XExpr, Expr *YExpr,
8084+
Expr *ZExpr,
8085+
const AMDGPUMaxNumWorkGroupsAttr &Attr) {
8086+
if (S.DiagnoseUnexpandedParameterPack(XExpr) ||
8087+
(YExpr && S.DiagnoseUnexpandedParameterPack(YExpr)) ||
8088+
(ZExpr && S.DiagnoseUnexpandedParameterPack(ZExpr)))
8089+
return true;
8090+
8091+
// Accept template arguments for now as they depend on something else.
8092+
// We'll get to check them when they eventually get instantiated.
8093+
if (XExpr->isValueDependent() || (YExpr && YExpr->isValueDependent()) ||
8094+
(ZExpr && ZExpr->isValueDependent()))
8095+
return false;
8096+
8097+
uint32_t NumWG = 0;
8098+
Expr *Exprs[3] = {XExpr, YExpr, ZExpr};
8099+
for (int i = 0; i < 3; i++) {
8100+
if (Exprs[i]) {
8101+
if (!checkUInt32Argument(S, Attr, Exprs[i], NumWG, i,
8102+
/*StrictlyUnsigned=*/true))
8103+
return true;
8104+
if (NumWG == 0) {
8105+
S.Diag(Attr.getLoc(), diag::err_attribute_argument_is_zero)
8106+
<< &Attr << Exprs[i]->getSourceRange();
8107+
return true;
8108+
}
8109+
}
8110+
}
8111+
8112+
return false;
8113+
}
8114+
8115+
AMDGPUMaxNumWorkGroupsAttr *
8116+
Sema::CreateAMDGPUMaxNumWorkGroupsAttr(const AttributeCommonInfo &CI,
8117+
Expr *XExpr, Expr *YExpr, Expr *ZExpr) {
8118+
AMDGPUMaxNumWorkGroupsAttr TmpAttr(Context, CI, XExpr, YExpr, ZExpr);
8119+
8120+
if (checkAMDGPUMaxNumWorkGroupsArguments(*this, XExpr, YExpr, ZExpr, TmpAttr))
8121+
return nullptr;
8122+
8123+
return ::new (Context)
8124+
AMDGPUMaxNumWorkGroupsAttr(Context, CI, XExpr, YExpr, ZExpr);
8125+
}
8126+
8127+
void Sema::addAMDGPUMaxNumWorkGroupsAttr(Decl *D, const AttributeCommonInfo &CI,
8128+
Expr *XExpr, Expr *YExpr,
8129+
Expr *ZExpr) {
8130+
if (auto *Attr = CreateAMDGPUMaxNumWorkGroupsAttr(CI, XExpr, YExpr, ZExpr))
8131+
D->addAttr(Attr);
8132+
}
8133+
8134+
static void handleAMDGPUMaxNumWorkGroupsAttr(Sema &S, Decl *D,
8135+
const ParsedAttr &AL) {
8136+
Expr *YExpr = (AL.getNumArgs() > 1) ? AL.getArgAsExpr(1) : nullptr;
8137+
Expr *ZExpr = (AL.getNumArgs() > 2) ? AL.getArgAsExpr(2) : nullptr;
8138+
S.addAMDGPUMaxNumWorkGroupsAttr(D, AL, AL.getArgAsExpr(0), YExpr, ZExpr);
8139+
}
8140+
80828141
static void handleX86ForceAlignArgPointerAttr(Sema &S, Decl *D,
80838142
const ParsedAttr &AL) {
80848143
// If we try to apply it to a function pointer, don't warn, but don't
@@ -9183,6 +9242,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
91839242
case ParsedAttr::AT_AMDGPUNumVGPR:
91849243
handleAMDGPUNumVGPRAttr(S, D, AL);
91859244
break;
9245+
case ParsedAttr::AT_AMDGPUMaxNumWorkGroups:
9246+
handleAMDGPUMaxNumWorkGroupsAttr(S, D, AL);
9247+
break;
91869248
case ParsedAttr::AT_AVRSignal:
91879249
handleAVRSignalAttr(S, D, AL);
91889250
break;

clang/lib/Sema/SemaTemplateInstantiateDecl.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -607,6 +607,29 @@ static void instantiateDependentAMDGPUWavesPerEUAttr(
607607
S.addAMDGPUWavesPerEUAttr(New, Attr, MinExpr, MaxExpr);
608608
}
609609

610+
static void instantiateDependentAMDGPUMaxNumWorkGroupsAttr(
611+
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
612+
const AMDGPUMaxNumWorkGroupsAttr &Attr, Decl *New) {
613+
EnterExpressionEvaluationContext Unevaluated(
614+
S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
615+
616+
ExprResult ResultX = S.SubstExpr(Attr.getMaxNumWorkGroupsX(), TemplateArgs);
617+
if (!ResultX.isUsable())
618+
return;
619+
ExprResult ResultY = S.SubstExpr(Attr.getMaxNumWorkGroupsY(), TemplateArgs);
620+
if (!ResultY.isUsable())
621+
return;
622+
ExprResult ResultZ = S.SubstExpr(Attr.getMaxNumWorkGroupsZ(), TemplateArgs);
623+
if (!ResultZ.isUsable())
624+
return;
625+
626+
Expr *XExpr = ResultX.getAs<Expr>();
627+
Expr *YExpr = ResultY.getAs<Expr>();
628+
Expr *ZExpr = ResultZ.getAs<Expr>();
629+
630+
S.addAMDGPUMaxNumWorkGroupsAttr(New, Attr, XExpr, YExpr, ZExpr);
631+
}
632+
610633
// This doesn't take any template parameters, but we have a custom action that
611634
// needs to happen when the kernel itself is instantiated. We need to run the
612635
// ItaniumMangler to mark the names required to name this kernel.
@@ -792,6 +815,12 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
792815
*AMDGPUFlatWorkGroupSize, New);
793816
}
794817

818+
if (const auto *AMDGPUMaxNumWorkGroups =
819+
dyn_cast<AMDGPUMaxNumWorkGroupsAttr>(TmplAttr)) {
820+
instantiateDependentAMDGPUMaxNumWorkGroupsAttr(
821+
*this, TemplateArgs, *AMDGPUMaxNumWorkGroups, New);
822+
}
823+
795824
if (const auto *ParamAttr = dyn_cast<HLSLParamModifierAttr>(TmplAttr)) {
796825
instantiateDependentHLSLParamModifierAttr(*this, TemplateArgs, ParamAttr,
797826
New);

clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,18 +40,53 @@ __attribute__((amdgpu_num_vgpr(64))) // expected-no-diagnostics
4040
__global__ void num_vgpr_64() {
4141
// CHECK: define{{.*}} amdgpu_kernel void @_Z11num_vgpr_64v() [[NUM_VGPR_64:#[0-9]+]]
4242
}
43+
__attribute__((amdgpu_max_num_work_groups(32, 4, 2))) // expected-no-diagnostics
44+
__global__ void max_num_work_groups_32_4_2() {
45+
// CHECK: define{{.*}} amdgpu_kernel void @_Z26max_num_work_groups_32_4_2v() [[MAX_NUM_WORK_GROUPS_32_4_2:#[0-9]+]]
46+
}
47+
__attribute__((amdgpu_max_num_work_groups(32))) // expected-no-diagnostics
48+
__global__ void max_num_work_groups_32() {
49+
// CHECK: define{{.*}} amdgpu_kernel void @_Z22max_num_work_groups_32v() [[MAX_NUM_WORK_GROUPS_32_1_1:#[0-9]+]]
50+
}
51+
__attribute__((amdgpu_max_num_work_groups(32,1))) // expected-no-diagnostics
52+
__global__ void max_num_work_groups_32_1() {
53+
// CHECK: define{{.*}} amdgpu_kernel void @_Z24max_num_work_groups_32_1v() [[MAX_NUM_WORK_GROUPS_32_1_1:#[0-9]+]]
54+
}
55+
56+
57+
58+
template<unsigned a>
59+
__attribute__((amdgpu_max_num_work_groups(a, 4, 2)))
60+
__global__ void template_a_4_2_max_num_work_groups() {}
61+
template __global__ void template_a_4_2_max_num_work_groups<32>();
62+
// CHECK: define{{.*}} amdgpu_kernel void @_Z34template_a_4_2_max_num_work_groupsILj32EEvv() [[MAX_NUM_WORK_GROUPS_32_4_2:#[0-9]+]]
63+
64+
template<unsigned a>
65+
__attribute__((amdgpu_max_num_work_groups(32, a, 2)))
66+
__global__ void template_32_a_2_max_num_work_groups() {}
67+
template __global__ void template_32_a_2_max_num_work_groups<4>();
68+
// CHECK: define{{.*}} amdgpu_kernel void @_Z35template_32_a_2_max_num_work_groupsILj4EEvv() [[MAX_NUM_WORK_GROUPS_32_4_2:#[0-9]+]]
69+
70+
template<unsigned a>
71+
__attribute__((amdgpu_max_num_work_groups(32, 4, a)))
72+
__global__ void template_32_4_a_max_num_work_groups() {}
73+
template __global__ void template_32_4_a_max_num_work_groups<2>();
74+
// CHECK: define{{.*}} amdgpu_kernel void @_Z35template_32_4_a_max_num_work_groupsILj2EEvv() [[MAX_NUM_WORK_GROUPS_32_4_2:#[0-9]+]]
4375

4476
// Make sure this is silently accepted on other targets.
4577
// NAMD-NOT: "amdgpu-flat-work-group-size"
4678
// NAMD-NOT: "amdgpu-waves-per-eu"
4779
// NAMD-NOT: "amdgpu-num-vgpr"
4880
// NAMD-NOT: "amdgpu-num-sgpr"
81+
// NAMD-NOT: "amdgpu-max-num-work-groups"
4982

5083
// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"{{.*}}"uniform-work-group-size"="true"
5184
// MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"
5285
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}}"amdgpu-flat-work-group-size"="32,64"
5386
// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2"
5487
// CHECK-DAG: attributes [[NUM_SGPR_32]] = {{.*}}"amdgpu-num-sgpr"="32"
5588
// CHECK-DAG: attributes [[NUM_VGPR_64]] = {{.*}}"amdgpu-num-vgpr"="64"
89+
// CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_32_4_2]] = {{.*}}"amdgpu-max-num-workgroups"="32,4,2"
90+
// CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_32_1_1]] = {{.*}}"amdgpu-max-num-workgroups"="32,1,1"
5691

5792
// NOUB-NOT: "uniform-work-group-size"="true"

clang/test/CodeGenOpenCL/amdgpu-attrs.cl

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,46 @@ kernel void reqd_work_group_size_32_2_1_flat_work_group_size_16_128() {
139139
// CHECK: define{{.*}} amdgpu_kernel void @reqd_work_group_size_32_2_1_flat_work_group_size_16_128() [[FLAT_WORK_GROUP_SIZE_16_128:#[0-9]+]]
140140
}
141141

142+
__attribute__((amdgpu_max_num_work_groups(1, 1, 1))) // expected-no-diagnostics
143+
kernel void max_num_work_groups_1_1_1() {
144+
// CHECK: define{{.*}} amdgpu_kernel void @max_num_work_groups_1_1_1() [[MAX_NUM_WORK_GROUPS_1_1_1:#[0-9]+]]
145+
}
146+
147+
__attribute__((amdgpu_max_num_work_groups(32, 1, 1))) // expected-no-diagnostics
148+
kernel void max_num_work_groups_32_1_1() {
149+
// CHECK: define{{.*}} amdgpu_kernel void @max_num_work_groups_32_1_1() [[MAX_NUM_WORK_GROUPS_32_1_1:#[0-9]+]]
150+
}
151+
152+
__attribute__((amdgpu_max_num_work_groups(32, 8, 1))) // expected-no-diagnostics
153+
kernel void max_num_work_groups_32_8_1() {
154+
// CHECK: define{{.*}} amdgpu_kernel void @max_num_work_groups_32_8_1() [[MAX_NUM_WORK_GROUPS_32_8_1:#[0-9]+]]
155+
}
156+
157+
__attribute__((amdgpu_max_num_work_groups(1, 1, 32))) // expected-no-diagnostics
158+
kernel void max_num_work_groups_1_1_32() {
159+
// CHECK: define{{.*}} amdgpu_kernel void @max_num_work_groups_1_1_32() [[MAX_NUM_WORK_GROUPS_1_1_32:#[0-9]+]]
160+
}
161+
162+
__attribute__((amdgpu_max_num_work_groups(1, 8, 32))) // expected-no-diagnostics
163+
kernel void max_num_work_groups_1_8_32() {
164+
// CHECK: define{{.*}} amdgpu_kernel void @max_num_work_groups_1_8_32() [[MAX_NUM_WORK_GROUPS_1_8_32:#[0-9]+]]
165+
}
166+
167+
__attribute__((amdgpu_max_num_work_groups(4, 8, 32))) // expected-no-diagnostics
168+
kernel void max_num_work_groups_4_8_32() {
169+
// CHECK: define{{.*}} amdgpu_kernel void @max_num_work_groups_4_8_32() [[MAX_NUM_WORK_GROUPS_4_8_32:#[0-9]+]]
170+
}
171+
172+
__attribute__((amdgpu_max_num_work_groups(32))) // expected-no-diagnostics
173+
kernel void max_num_work_groups_32() {
174+
// CHECK: define{{.*}} amdgpu_kernel void @max_num_work_groups_32() [[MAX_NUM_WORK_GROUPS_32_1_1:#[0-9]+]]
175+
}
176+
177+
__attribute__((amdgpu_max_num_work_groups(32,1))) // expected-no-diagnostics
178+
kernel void max_num_work_groups_32_1() {
179+
// CHECK: define{{.*}} amdgpu_kernel void @max_num_work_groups_32_1() [[MAX_NUM_WORK_GROUPS_32_1_1:#[0-9]+]]
180+
}
181+
142182
void a_function() {
143183
// CHECK: define{{.*}} void @a_function() [[A_FUNCTION:#[0-9]+]]
144184
}
@@ -189,5 +229,12 @@ kernel void default_kernel() {
189229
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
190230
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
191231

232+
// CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_1_1_1]] = {{.*}} "amdgpu-max-num-workgroups"="1,1,1"
233+
// CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_32_1_1]] = {{.*}} "amdgpu-max-num-workgroups"="32,1,1"
234+
// CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_32_8_1]] = {{.*}} "amdgpu-max-num-workgroups"="32,8,1"
235+
// CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_1_1_32]] = {{.*}} "amdgpu-max-num-workgroups"="1,1,32"
236+
// CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_1_8_32]] = {{.*}} "amdgpu-max-num-workgroups"="1,8,32"
237+
// CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_4_8_32]] = {{.*}} "amdgpu-max-num-workgroups"="4,8,32"
238+
192239
// CHECK-DAG: attributes [[A_FUNCTION]] = {{.*}}
193240
// CHECK-DAG: attributes [[DEFAULT_KERNEL_ATTRS]] = {{.*}} "amdgpu-flat-work-group-size"="1,256"

clang/test/Misc/pragma-attribute-supported-attributes-list.test

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
// CHECK: #pragma clang attribute supports the following attributes:
66
// CHECK-NEXT: AMDGPUFlatWorkGroupSize (SubjectMatchRule_function)
7+
// CHECK-NEXT: AMDGPUMaxNumWorkGroups (SubjectMatchRule_function)
78
// CHECK-NEXT: AMDGPUNumSGPR (SubjectMatchRule_function)
89
// CHECK-NEXT: AMDGPUNumVGPR (SubjectMatchRule_function)
910
// CHECK-NEXT: AMDGPUWavesPerEU (SubjectMatchRule_function)

0 commit comments

Comments
 (0)