Skip to content

Commit ecd2f56

Browse files
authored
[AMDGPU] Warn if 'amdgpu-waves-per-eu' target occupancy was not met (#74055)
This should make it a bit harder to miss this type of issue. The warning only shows if amdgpu-waves-per-eu is used. See SWDEV-434482
1 parent 641e05d commit ecd2f56

File tree

2 files changed

+27
-0
lines changed

2 files changed

+27
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -939,6 +939,17 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
939939
ProgInfo.Occupancy = STM.computeOccupancy(MF.getFunction(), ProgInfo.LDSSize,
940940
ProgInfo.NumSGPRsForWavesPerEU,
941941
ProgInfo.NumVGPRsForWavesPerEU);
942+
const auto [MinWEU, MaxWEU] =
943+
AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", {0, 0}, true);
944+
if (ProgInfo.Occupancy < MinWEU) {
945+
DiagnosticInfoOptimizationFailure Diag(
946+
F, F.getSubprogram(),
947+
"failed to meet occupancy target given by 'amdgpu-waves-per-eu' in "
948+
"'" +
949+
F.getName() + "': desired occupancy was " + Twine(MinWEU) +
950+
", final occupancy is " + Twine(ProgInfo.Occupancy));
951+
F.getContext().diagnose(Diag);
952+
}
942953
}
943954

944955
static unsigned getRsrcReg(CallingConv::ID CallConv) {
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=WARN %s
2+
3+
; 1024 flat work group size across 2560 possible threads -> occupancy should be 8 max.
4+
; WARN: warning: <unknown>:0:0: failed to meet occupancy target given by 'amdgpu-waves-per-eu' in 'occupancy_8_target_9': desired occupancy was 9, final occupancy is 8
5+
define amdgpu_kernel void @occupancy_8_target_9() #0 {
6+
ret void
7+
}
8+
9+
; Impossible occupancy target
10+
; WARN: warning: <unknown>:0:0: failed to meet occupancy target given by 'amdgpu-waves-per-eu' in 'impossible_occupancy': desired occupancy was 11, final occupancy is 10
11+
define amdgpu_kernel void @impossible_occupancy() #1 {
12+
ret void
13+
}
14+
15+
attributes #0 = { "amdgpu-flat-work-group-size"="1,1024" "amdgpu-waves-per-eu"="9" }
16+
attributes #1 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="11" }

0 commit comments

Comments
 (0)