Skip to content

Commit 5104519

Browse files
author
git apple-llvm automerger
committed
Merge commit '1b347223023d' from llvm.org/main into next
2 parents feab090 + 1b34722 commit 5104519

File tree

2 files changed

+33
-8
lines changed

2 files changed

+33
-8
lines changed

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -191,17 +191,17 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getEffectiveWavesPerEU(
191191
getOccupancyWithWorkGroupSizes(LDSBytes, FlatWorkGroupSizes).second};
192192
Default.first = std::min(Default.first, Default.second);
193193

194-
// Make sure requested minimum is less than requested maximum.
195-
if (RequestedWavesPerEU.second &&
196-
RequestedWavesPerEU.first > RequestedWavesPerEU.second)
197-
return Default;
198-
199-
// Make sure requested values do not violate subtarget's specifications and
200-
// are compatible with values implied by minimum/maximum flat workgroup sizes.
194+
// Make sure requested minimum is within the default range and lower than the
195+
// requested maximum. The latter must not violate target specification.
201196
if (RequestedWavesPerEU.first < Default.first ||
202-
RequestedWavesPerEU.second > Default.second)
197+
RequestedWavesPerEU.first > Default.second ||
198+
RequestedWavesPerEU.first > RequestedWavesPerEU.second ||
199+
RequestedWavesPerEU.second > getMaxWavesPerEU())
203200
return Default;
204201

202+
// We cannot exceed maximum occupancy implied by flat workgroup size and LDS.
203+
RequestedWavesPerEU.second =
204+
std::min(RequestedWavesPerEU.second, Default.second);
205205
return RequestedWavesPerEU;
206206
}
207207

llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,3 +200,28 @@ entry:
200200
ret void
201201
}
202202
attributes #10 = {"amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="2,2"}
203+
204+
; Minimum 2 waves, maximum limited by LDS usage.
205+
; CHECK-LABEL: {{^}}empty_at_least_2_lds_limited:
206+
; CHECK: SGPRBlocks: 12
207+
; CHECK: VGPRBlocks: 12
208+
; CHECK: NumSGPRsForWavesPerEU: 102
209+
; CHECK: NumVGPRsForWavesPerEU: 49
210+
define amdgpu_kernel void @empty_at_least_2_lds_limited() #11 {
211+
entry:
212+
ret void
213+
}
214+
attributes #11 = {"amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" "amdgpu-lds-size"="16384"}
215+
216+
; Minimum 2 waves, maximum limited by LDS usage. Requested maximum within spec
217+
; but above achievable occupancy has no effect.
218+
; CHECK-LABEL: {{^}}empty_at_least_2_lds_limited_max_above_achievable:
219+
; CHECK: SGPRBlocks: 12
220+
; CHECK: VGPRBlocks: 12
221+
; CHECK: NumSGPRsForWavesPerEU: 102
222+
; CHECK: NumVGPRsForWavesPerEU: 49
223+
define amdgpu_kernel void @empty_at_least_2_lds_limited_max_above_achievable() #12 {
224+
entry:
225+
ret void
226+
}
227+
attributes #12 = {"amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2,10" "amdgpu-lds-size"="16384"}

0 commit comments

Comments
 (0)