Skip to content

Commit 0b40f97

Browse files
authored
AMDGPU: Treat uint32_max as the default value for amdgpu-max-num-workgroups (#113751)
0 does not make sense as a value for this to be, much less the default. Also stop emitting each individual field if it is the default, rather than if any element was the default. Also fix the name of the test since it didn't exactly match the real attribute name.
1 parent 0c60573 commit 0b40f97

File tree

6 files changed

+77
-13
lines changed

6 files changed

+77
-13
lines changed

llvm/docs/AMDGPUUsage.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1645,9 +1645,9 @@ The AMDGPU backend supports the following LLVM IR attributes.
16451645
reduced by heuristics.
16461646

16471647
"amdgpu-max-num-workgroups"="x,y,z" Specify the maximum number of work groups for the kernel dispatch in the
1648-
X, Y, and Z dimensions. Generated by the ``amdgpu_max_num_work_groups``
1649-
CLANG attribute [CLANG-ATTR]_. Clang only emits this attribute when all
1650-
the three numbers are >= 1.
1648+
X, Y, and Z dimensions. Each number must be >= 1. Generated by the
1649+
``amdgpu_max_num_work_groups`` CLANG attribute [CLANG-ATTR]_. Clang only
1650+
emits this attribute when all the three numbers are >= 1.
16511651

16521652
"amdgpu-no-agpr" Indicates the function will not require allocating AGPRs. This is only
16531653
relevant on subtargets with AGPRs. The behavior is undefined if a

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -504,14 +504,21 @@ MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,
504504

505505
Kern[".max_flat_workgroup_size"] =
506506
Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
507-
unsigned NumWGX = MFI.getMaxNumWorkGroupsX();
508-
unsigned NumWGY = MFI.getMaxNumWorkGroupsY();
509-
unsigned NumWGZ = MFI.getMaxNumWorkGroupsZ();
510-
if (NumWGX != 0 && NumWGY != 0 && NumWGZ != 0) {
507+
508+
uint32_t NumWGY = MFI.getMaxNumWorkGroupsY();
509+
uint32_t NumWGZ = MFI.getMaxNumWorkGroupsZ();
510+
uint32_t NumWGX = MFI.getMaxNumWorkGroupsX();
511+
512+
// TODO: Should consider 0 invalid and reject in IR verifier.
513+
if (NumWGX != std::numeric_limits<uint32_t>::max() && NumWGX != 0)
511514
Kern[".max_num_workgroups_x"] = Kern.getDocument()->getNode(NumWGX);
515+
516+
if (NumWGY != std::numeric_limits<uint32_t>::max() && NumWGY != 0)
512517
Kern[".max_num_workgroups_y"] = Kern.getDocument()->getNode(NumWGY);
518+
519+
if (NumWGZ != std::numeric_limits<uint32_t>::max() && NumWGZ != 0)
513520
Kern[".max_num_workgroups_z"] = Kern.getDocument()->getNode(NumWGZ);
514-
}
521+
515522
Kern[".sgpr_spill_count"] =
516523
Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
517524
Kern[".vgpr_spill_count"] =

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,5 +371,6 @@ const AMDGPUSubtarget &AMDGPUSubtarget::get(const TargetMachine &TM, const Funct
371371

372372
SmallVector<unsigned>
373373
AMDGPUSubtarget::getMaxNumWorkGroups(const Function &F) const {
374-
return AMDGPU::getIntegerVecAttribute(F, "amdgpu-max-num-workgroups", 3);
374+
return AMDGPU::getIntegerVecAttribute(F, "amdgpu-max-num-workgroups", 3,
375+
std::numeric_limits<uint32_t>::max());
375376
}

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1307,15 +1307,16 @@ getIntegerPairAttribute(const Function &F, StringRef Name,
13071307
}
13081308

13091309
SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
1310-
unsigned Size) {
1310+
unsigned Size,
1311+
unsigned DefaultVal) {
13111312
assert(Size > 2);
1312-
SmallVector<unsigned> Default(Size, 0);
1313+
SmallVector<unsigned> Default(Size, DefaultVal);
13131314

13141315
Attribute A = F.getFnAttribute(Name);
13151316
if (!A.isStringAttribute())
13161317
return Default;
13171318

1318-
SmallVector<unsigned> Vals(Size, 0);
1319+
SmallVector<unsigned> Vals(Size, DefaultVal);
13191320

13201321
LLVMContext &Ctx = F.getContext();
13211322

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -919,7 +919,8 @@ getIntegerPairAttribute(const Function &F, StringRef Name,
919919
///
920920
/// \returns false if any error occurs.
921921
SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
922-
unsigned Size);
922+
unsigned Size,
923+
unsigned DefaultVal = 0);
923924

924925
/// Represents the counter values to wait for in an s_waitcnt instruction.
925926
///

llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-workgroups.ll renamed to llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups.ll

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,32 @@ entry:
4646
attributes #4 = {"amdgpu-max-num-workgroups"="1024,1024,1024"}
4747

4848

49+
50+
; Ignore if number of work groups for x dimension is 0.
51+
; CHECK-LABEL: {{^}}empty_max_num_workgroups_x_max:
52+
define amdgpu_kernel void @empty_max_num_workgroups_x_max() #5 {
53+
entry:
54+
ret void
55+
}
56+
attributes #5 = {"amdgpu-max-num-workgroups"="4294967295,2,3"}
57+
58+
; Ignore if number of work groups for y dimension is 0.
59+
; CHECK-LABEL: {{^}}empty_max_num_workgroups_y_max:
60+
define amdgpu_kernel void @empty_max_num_workgroups_y_max() #6 {
61+
entry:
62+
ret void
63+
}
64+
attributes #6 = {"amdgpu-max-num-workgroups"="1,4294967295,3"}
65+
66+
; Ignore if number of work groups for z dimension is 0.
67+
; CHECK-LABEL: {{^}}empty_max_num_workgroups_z_max:
68+
define amdgpu_kernel void @empty_max_num_workgroups_z_max() #7 {
69+
entry:
70+
ret void
71+
}
72+
attributes #7 = {"amdgpu-max-num-workgroups"="1,2,4294967295"}
73+
74+
4975
; CHECK: .amdgpu_metadata
5076
; CHECK: - .args:
5177
; CHECK: .max_flat_workgroup_size: 1024
@@ -54,16 +80,22 @@ attributes #4 = {"amdgpu-max-num-workgroups"="1024,1024,1024"}
5480

5581
; CHECK: - .args:
5682
; CHECK: .max_flat_workgroup_size: 1024
83+
; CHECK-NEXT: .max_num_workgroups_y: 2
84+
; CHECK-NEXT: .max_num_workgroups_z: 3
5785
; CHECK-NEXT: .name: empty_max_num_workgroups_x0
5886
; CHECK-NEXT: .private_segment_fixed_size: 0
5987

6088
; CHECK: - .args:
6189
; CHECK: .max_flat_workgroup_size: 1024
90+
; CHECK-NEXT: .max_num_workgroups_x: 1
91+
; CHECK-NEXT: .max_num_workgroups_z: 3
6292
; CHECK-NEXT: .name: empty_max_num_workgroups_y0
6393
; CHECK-NEXT: .private_segment_fixed_size: 0
6494

6595
; CHECK: - .args:
6696
; CHECK: .max_flat_workgroup_size: 1024
97+
; CHECK-NEXT: .max_num_workgroups_x: 1
98+
; CHECK-NEXT: .max_num_workgroups_y: 2
6799
; CHECK-NEXT: .name: empty_max_num_workgroups_z0
68100
; CHECK-NEXT: .private_segment_fixed_size: 0
69101

@@ -82,3 +114,25 @@ attributes #4 = {"amdgpu-max-num-workgroups"="1024,1024,1024"}
82114
; CHECK-NEXT: .max_num_workgroups_z: 1024
83115
; CHECK-NEXT: .name: empty_max_num_workgroups_1024_1024_1024
84116
; CHECK-NEXT: .private_segment_fixed_size: 0
117+
118+
119+
; CHECK: - .args:
120+
; CHECK: .max_flat_workgroup_size: 1024
121+
; CHECK-NEXT: .max_num_workgroups_y: 2
122+
; CHECK-NEXT: .max_num_workgroups_z: 3
123+
; CHECK-NEXT: .name: empty_max_num_workgroups_x_max
124+
; CHECK-NEXT: .private_segment_fixed_size: 0
125+
126+
; CHECK: - .args:
127+
; CHECK: .max_flat_workgroup_size: 1024
128+
; CHECK-NEXT: .max_num_workgroups_x: 1
129+
; CHECK-NEXT: .max_num_workgroups_z: 3
130+
; CHECK-NEXT: .name: empty_max_num_workgroups_y_max
131+
; CHECK-NEXT: .private_segment_fixed_size: 0
132+
133+
; CHECK: - .args:
134+
; CHECK: .max_flat_workgroup_size: 1024
135+
; CHECK-NEXT: .max_num_workgroups_x: 1
136+
; CHECK-NEXT: .max_num_workgroups_y: 2
137+
; CHECK-NEXT: .name: empty_max_num_workgroups_z_max
138+
; CHECK-NEXT: .private_segment_fixed_size: 0

0 commit comments

Comments
 (0)