Skip to content

Commit 80fa3f7

Browse files
committed
AMDGPU: Treat uint32_max as the default value for amdgpu-max-num-workgroups
0 does not make sense as a value for this to be, much less the default. Also stop emitting each individual field if it is the default, rather than if any element was the default. Also fix the name of the test since it didn't exactly match the real attribute name.
1 parent 09a4bcf commit 80fa3f7

File tree

5 files changed

+76
-10
lines changed

5 files changed

+76
-10
lines changed

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -504,14 +504,19 @@ MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,
504504

505505
Kern[".max_flat_workgroup_size"] =
506506
Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
507-
unsigned NumWGX = MFI.getMaxNumWorkGroupsX();
508-
unsigned NumWGY = MFI.getMaxNumWorkGroupsY();
509-
unsigned NumWGZ = MFI.getMaxNumWorkGroupsZ();
510-
if (NumWGX != 0 && NumWGY != 0 && NumWGZ != 0) {
507+
508+
uint32_t NumWGX = MFI.getMaxNumWorkGroupsX();
509+
uint32_t NumWGY = MFI.getMaxNumWorkGroupsY();
510+
uint32_t NumWGZ = MFI.getMaxNumWorkGroupsZ();
511+
if (NumWGX != std::numeric_limits<uint32_t>::max())
511512
Kern[".max_num_workgroups_x"] = Kern.getDocument()->getNode(NumWGX);
513+
514+
if (NumWGY != std::numeric_limits<uint32_t>::max())
512515
Kern[".max_num_workgroups_y"] = Kern.getDocument()->getNode(NumWGY);
516+
517+
if (NumWGZ != std::numeric_limits<uint32_t>::max())
513518
Kern[".max_num_workgroups_z"] = Kern.getDocument()->getNode(NumWGZ);
514-
}
519+
515520
Kern[".sgpr_spill_count"] =
516521
Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
517522
Kern[".vgpr_spill_count"] =

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,5 +371,6 @@ const AMDGPUSubtarget &AMDGPUSubtarget::get(const TargetMachine &TM, const Funct
371371

372372
SmallVector<unsigned>
373373
AMDGPUSubtarget::getMaxNumWorkGroups(const Function &F) const {
374-
return AMDGPU::getIntegerVecAttribute(F, "amdgpu-max-num-workgroups", 3);
374+
return AMDGPU::getIntegerVecAttribute(F, "amdgpu-max-num-workgroups", 3,
375+
std::numeric_limits<uint32_t>::max());
375376
}

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1307,15 +1307,16 @@ getIntegerPairAttribute(const Function &F, StringRef Name,
13071307
}
13081308

13091309
SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
1310-
unsigned Size) {
1310+
unsigned Size,
1311+
unsigned DefaultVal) {
13111312
assert(Size > 2);
1312-
SmallVector<unsigned> Default(Size, 0);
1313+
SmallVector<unsigned> Default(Size, DefaultVal);
13131314

13141315
Attribute A = F.getFnAttribute(Name);
13151316
if (!A.isStringAttribute())
13161317
return Default;
13171318

1318-
SmallVector<unsigned> Vals(Size, 0);
1319+
SmallVector<unsigned> Vals(Size, DefaultVal);
13191320

13201321
LLVMContext &Ctx = F.getContext();
13211322

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -919,7 +919,8 @@ getIntegerPairAttribute(const Function &F, StringRef Name,
919919
///
920920
/// \returns false if any error occurs.
921921
SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
922-
unsigned Size);
922+
unsigned Size,
923+
unsigned DefaultVal = 0);
923924

924925
/// Represents the counter values to wait for in an s_waitcnt instruction.
925926
///

llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-workgroups.ll renamed to llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups.ll

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,33 @@ entry:
4646
attributes #4 = {"amdgpu-max-num-workgroups"="1024,1024,1024"}
4747

4848

49+
50+
; Ignore if number of work groups for x dimension is 0.
51+
; CHECK-LABEL: {{^}}empty_max_num_workgroups_x_max:
52+
define amdgpu_kernel void @empty_max_num_workgroups_x_max() #5 {
53+
entry:
54+
ret void
55+
}
56+
attributes #5 = {"amdgpu-max-num-workgroups"="4294967295,2,3"}
57+
58+
; Ignore if number of work groups for y dimension is 0.
59+
; CHECK-LABEL: {{^}}empty_max_num_workgroups_y_max:
60+
define amdgpu_kernel void @empty_max_num_workgroups_y_max() #6 {
61+
entry:
62+
ret void
63+
}
64+
attributes #6 = {"amdgpu-max-num-workgroups"="1,4294967295,3"}
65+
66+
; Ignore if number of work groups for z dimension is 0.
67+
; CHECK-LABEL: {{^}}empty_max_num_workgroups_z_max:
68+
define amdgpu_kernel void @empty_max_num_workgroups_z_max() #7 {
69+
entry:
70+
ret void
71+
}
72+
attributes #7 = {"amdgpu-max-num-workgroups"="1,2,4294967295"}
73+
74+
75+
4976
; CHECK: .amdgpu_metadata
5077
; CHECK: - .args:
5178
; CHECK: .max_flat_workgroup_size: 1024
@@ -54,16 +81,25 @@ attributes #4 = {"amdgpu-max-num-workgroups"="1024,1024,1024"}
5481

5582
; CHECK: - .args:
5683
; CHECK: .max_flat_workgroup_size: 1024
84+
; CHECK-NEXT: .max_num_workgroups_x: 0
85+
; CHECK-NEXT: .max_num_workgroups_y: 2
86+
; CHECK-NEXT: .max_num_workgroups_z: 3
5787
; CHECK-NEXT: .name: empty_max_num_workgroups_x0
5888
; CHECK-NEXT: .private_segment_fixed_size: 0
5989

6090
; CHECK: - .args:
6191
; CHECK: .max_flat_workgroup_size: 1024
92+
; CHECK-NEXT: .max_num_workgroups_x: 1
93+
; CHECK-NEXT: .max_num_workgroups_y: 0
94+
; CHECK-NEXT: .max_num_workgroups_z: 3
6295
; CHECK-NEXT: .name: empty_max_num_workgroups_y0
6396
; CHECK-NEXT: .private_segment_fixed_size: 0
6497

6598
; CHECK: - .args:
6699
; CHECK: .max_flat_workgroup_size: 1024
100+
; CHECK-NEXT: .max_num_workgroups_x: 1
101+
; CHECK-NEXT: .max_num_workgroups_y: 2
102+
; CHECK-NEXT: .max_num_workgroups_z: 0
67103
; CHECK-NEXT: .name: empty_max_num_workgroups_z0
68104
; CHECK-NEXT: .private_segment_fixed_size: 0
69105

@@ -82,3 +118,25 @@ attributes #4 = {"amdgpu-max-num-workgroups"="1024,1024,1024"}
82118
; CHECK-NEXT: .max_num_workgroups_z: 1024
83119
; CHECK-NEXT: .name: empty_max_num_workgroups_1024_1024_1024
84120
; CHECK-NEXT: .private_segment_fixed_size: 0
121+
122+
123+
; CHECK: - .args:
124+
; CHECK: .max_flat_workgroup_size: 1024
125+
; CHECK-NEXT: .max_num_workgroups_y: 2
126+
; CHECK-NEXT: .max_num_workgroups_z: 3
127+
; CHECK-NEXT: .name: empty_max_num_workgroups_x_max
128+
; CHECK-NEXT: .private_segment_fixed_size: 0
129+
130+
; CHECK: - .args:
131+
; CHECK: .max_flat_workgroup_size: 1024
132+
; CHECK-NEXT: .max_num_workgroups_x: 1
133+
; CHECK-NEXT: .max_num_workgroups_z: 3
134+
; CHECK-NEXT: .name: empty_max_num_workgroups_y_max
135+
; CHECK-NEXT: .private_segment_fixed_size: 0
136+
137+
; CHECK: - .args:
138+
; CHECK: .max_flat_workgroup_size: 1024
139+
; CHECK-NEXT: .max_num_workgroups_x: 1
140+
; CHECK-NEXT: .max_num_workgroups_y: 2
141+
; CHECK-NEXT: .name: empty_max_num_workgroups_z_max
142+
; CHECK-NEXT: .private_segment_fixed_size: 0

0 commit comments

Comments
 (0)