Skip to content

Commit 7a4968b

Browse files
committed
[AMDGPU] Add dynamic stack bit info to kernel-resource-usage Rpass output
In code object 5 (https://llvm.org/docs/AMDGPUUsage.html#code-object-v5-metadata) the AMDGPU backend added the .uses_dynamic_stack bit to the kernel meta data to identity kernels which have compile time indeterminable stack usage (indirect function calls and recursion mainly). This patch adds this information to the output of the kernel-resource-usage remarks. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D156040 Author: Corbin Robeck <[email protected]>
1 parent 45f9f3f commit 7a4968b

File tree

3 files changed

+67
-7
lines changed

3 files changed

+67
-7
lines changed

clang/test/Frontend/amdgcn-machine-analysis-remarks.cl

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
// REQUIRES: amdgpu-registered-target
22
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx908 -Rpass-analysis=kernel-resource-usage -S -O0 -verify %s -o /dev/null
33

4-
// expected-remark@+9 {{Function Name: foo}}
5-
// expected-remark@+8 {{ SGPRs: 13}}
6-
// expected-remark@+7 {{ VGPRs: 10}}
7-
// expected-remark@+6 {{ AGPRs: 12}}
8-
// expected-remark@+5 {{ ScratchSize [bytes/lane]: 0}}
4+
// expected-remark@+10 {{Function Name: foo}}
5+
// expected-remark@+9 {{ SGPRs: 13}}
6+
// expected-remark@+8 {{ VGPRs: 10}}
7+
// expected-remark@+7 {{ AGPRs: 12}}
8+
// expected-remark@+6 {{ ScratchSize [bytes/lane]: 0}}
9+
// expected-remark@+5 {{ Dynamic Stack: False}}
910
// expected-remark@+4 {{ Occupancy [waves/SIMD]: 10}}
1011
// expected-remark@+3 {{ SGPRs Spill: 0}}
1112
// expected-remark@+2 {{ VGPRs Spill: 0}}

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1293,6 +1293,9 @@ void AMDGPUAsmPrinter::emitResourceUsageRemarks(
12931293
EmitResourceUsageRemark("NumAGPR", "AGPRs", CurrentProgramInfo.NumAccVGPR);
12941294
EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]",
12951295
CurrentProgramInfo.ScratchSize);
1296+
StringRef DynamicStackStr =
1297+
CurrentProgramInfo.DynamicCallStack ? "True" : "False";
1298+
EmitResourceUsageRemark("DynamicStack", "Dynamic Stack", DynamicStackStr);
12961299
EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]",
12971300
CurrentProgramInfo.Occupancy);
12981301
EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill",

llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=STDERR %s
1+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=null %s 2>&1 | FileCheck -check-prefix=STDERR %s
22
; RUN: FileCheck -check-prefix=REMARK %s < %t
33

44
; STDERR: remark: foo.cl:27:0: Function Name: test_kernel
55
; STDERR-NEXT: remark: foo.cl:27:0: SGPRs: 28
66
; STDERR-NEXT: remark: foo.cl:27:0: VGPRs: 9
77
; STDERR-NEXT: remark: foo.cl:27:0: AGPRs: 43
88
; STDERR-NEXT: remark: foo.cl:27:0: ScratchSize [bytes/lane]: 0
9+
; STDERR-NEXT: remark: foo.cl:27:0: Dynamic Stack: False
910
; STDERR-NEXT: remark: foo.cl:27:0: Occupancy [waves/SIMD]: 5
1011
; STDERR-NEXT: remark: foo.cl:27:0: SGPRs Spill: 0
1112
; STDERR-NEXT: remark: foo.cl:27:0: VGPRs Spill: 0
@@ -55,7 +56,16 @@
5556
; REMARK-NEXT: Args:
5657
; REMARK-NEXT: - String: ' ScratchSize [bytes/lane]: '
5758
; REMARK-NEXT: - ScratchSize: '0'
58-
; REMARK-NEXT: ...
59+
; REMARK-NEXT: ..
60+
; REMARK-NEXT: --- !Analysis
61+
; REMARK-NEXT: Pass: kernel-resource-usage
62+
; REMARK-NEXT: Name: DynamicStack
63+
; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
64+
; REMARK-NEXT: Function: test_kernel
65+
; REMARK-NEXT: Args:
66+
; REMARK-NEXT: - String: ' Dynamic Stack:
67+
; REMARK-NEXT: - DynamicStack: 'False'
68+
; REMARK-NEXT: ..
5969
; REMARK-NEXT: --- !Analysis
6070
; REMARK-NEXT: Pass: kernel-resource-usage
6171
; REMARK-NEXT: Name: Occupancy
@@ -108,6 +118,7 @@ define amdgpu_kernel void @test_kernel() !dbg !3 {
108118
; STDERR-NEXT: remark: foo.cl:42:0: VGPRs: 0
109119
; STDERR-NEXT: remark: foo.cl:42:0: AGPRs: 0
110120
; STDERR-NEXT: remark: foo.cl:42:0: ScratchSize [bytes/lane]: 0
121+
; STDERR-NEXT: remark: foo.cl:42:0: Dynamic Stack: False
111122
; STDERR-NEXT: remark: foo.cl:42:0: Occupancy [waves/SIMD]: 0
112123
; STDERR-NEXT: remark: foo.cl:42:0: SGPRs Spill: 0
113124
; STDERR-NEXT: remark: foo.cl:42:0: VGPRs Spill: 0
@@ -124,6 +135,7 @@ define void @test_func() !dbg !6 {
124135
; STDERR-NEXT: remark: foo.cl:8:0: VGPRs: 0
125136
; STDERR-NEXT: remark: foo.cl:8:0: AGPRs: 0
126137
; STDERR-NEXT: remark: foo.cl:8:0: ScratchSize [bytes/lane]: 0
138+
; STDERR-NEXT: remark: foo.cl:8:0: Dynamic Stack: False
127139
; STDERR-NEXT: remark: foo.cl:8:0: Occupancy [waves/SIMD]: 8
128140
; STDERR-NEXT: remark: foo.cl:8:0: SGPRs Spill: 0
129141
; STDERR-NEXT: remark: foo.cl:8:0: VGPRs Spill: 0
@@ -137,15 +149,56 @@ define amdgpu_kernel void @empty_kernel() !dbg !7 {
137149
; STDERR-NEXT: remark: foo.cl:52:0: VGPRs: 0
138150
; STDERR-NEXT: remark: foo.cl:52:0: AGPRs: 0
139151
; STDERR-NEXT: remark: foo.cl:52:0: ScratchSize [bytes/lane]: 0
152+
; STDERR-NEXT: remark: foo.cl:52:0: Dynamic Stack: False
140153
; STDERR-NEXT: remark: foo.cl:52:0: Occupancy [waves/SIMD]: 0
141154
; STDERR-NEXT: remark: foo.cl:52:0: SGPRs Spill: 0
142155
; STDERR-NEXT: remark: foo.cl:52:0: VGPRs Spill: 0
143156
define void @empty_func() !dbg !8 {
144157
ret void
145158
}
146159

160+
; STDERR: remark: foo.cl:64:0: Function Name: test_indirect_call
161+
; STDERR-NEXT: remark: foo.cl:64:0: SGPRs: 39
162+
; STDERR-NEXT: remark: foo.cl:64:0: VGPRs: 32
163+
; STDERR-NEXT: remark: foo.cl:64:0: AGPRs: 10
164+
; STDERR-NEXT: remark: foo.cl:64:0: ScratchSize [bytes/lane]: 0
165+
; STDERR-NEXT: remark: foo.cl:64:0: Dynamic Stack: True
166+
; STDERR-NEXT: remark: foo.cl:64:0: Occupancy [waves/SIMD]: 8
167+
; STDERR-NEXT: remark: foo.cl:64:0: SGPRs Spill: 0
168+
; STDERR-NEXT: remark: foo.cl:64:0: VGPRs Spill: 0
169+
; STDERR-NEXT: remark: foo.cl:64:0: LDS Size [bytes/block]: 0
170+
@gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
171+
172+
define amdgpu_kernel void @test_indirect_call() !dbg !9 {
173+
%fptr = load ptr, ptr addrspace(4) @gv.fptr0
174+
call void %fptr()
175+
ret void
176+
}
177+
178+
; STDERR: remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack
179+
; STDERR-NEXT: remark: foo.cl:74:0: SGPRs: 39
180+
; STDERR-NEXT: remark: foo.cl:74:0: VGPRs: 32
181+
; STDERR-NEXT: remark: foo.cl:74:0: AGPRs: 10
182+
; STDERR-NEXT: remark: foo.cl:74:0: ScratchSize [bytes/lane]: 64
183+
; STDERR-NEXT: remark: foo.cl:74:0: Dynamic Stack: True
184+
; STDERR-NEXT: remark: foo.cl:74:0: Occupancy [waves/SIMD]: 8
185+
; STDERR-NEXT: remark: foo.cl:74:0: SGPRs Spill: 0
186+
; STDERR-NEXT: remark: foo.cl:74:0: VGPRs Spill: 0
187+
; STDERR-NEXT: remark: foo.cl:74:0: LDS Size [bytes/block]: 0
188+
189+
declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture readonly, i8, i64, i1 immarg)
190+
191+
define amdgpu_kernel void @test_indirect_w_static_stack() !dbg !10 {
192+
%alloca = alloca <10 x i64>, align 16, addrspace(5)
193+
call void @llvm.memset.p5.i64(ptr addrspace(5) %alloca, i8 0, i64 40, i1 false)
194+
%fptr = load ptr, ptr addrspace(4) @gv.fptr0
195+
call void %fptr()
196+
ret void
197+
}
198+
147199
!llvm.dbg.cu = !{!0}
148200
!llvm.module.flags = !{!2}
201+
!llvm.module.flags = !{!11}
149202

150203
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
151204
!1 = !DIFile(filename: "foo.cl", directory: "/tmp")
@@ -156,3 +209,6 @@ define void @empty_func() !dbg !8 {
156209
!6 = distinct !DISubprogram(name: "test_func", scope: !1, file: !1, type: !4, scopeLine: 42, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
157210
!7 = distinct !DISubprogram(name: "empty_kernel", scope: !1, file: !1, type: !4, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
158211
!8 = distinct !DISubprogram(name: "empty_func", scope: !1, file: !1, type: !4, scopeLine: 52, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
212+
!9 = distinct !DISubprogram(name: "test_indirect_call", scope: !1, file: !1, type: !4, scopeLine: 64, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
213+
!10 = distinct !DISubprogram(name: "test_indirect_w_static_stack", scope: !1, file: !1, type: !4, scopeLine: 74, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
214+
!11 = !{i32 1, !"amdgpu_code_object_version", i32 500}

0 commit comments

Comments
 (0)