1
- ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=obj -o /dev/ null %s 2>&1 | FileCheck -check-prefix=STDERR %s
1
+ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=null %s 2>&1 | FileCheck -check-prefix=STDERR %s
2
2
; RUN: FileCheck -check-prefix=REMARK %s < %t
3
3
4
4
; STDERR: remark: foo.cl:27:0: Function Name: test_kernel
5
5
; STDERR-NEXT: remark: foo.cl:27:0: SGPRs: 28
6
6
; STDERR-NEXT: remark: foo.cl:27:0: VGPRs: 9
7
7
; STDERR-NEXT: remark: foo.cl:27:0: AGPRs: 43
8
8
; STDERR-NEXT: remark: foo.cl:27:0: ScratchSize [bytes/lane]: 0
9
+ ; STDERR-NEXT: remark: foo.cl:27:0: Dynamic Stack: False
9
10
; STDERR-NEXT: remark: foo.cl:27:0: Occupancy [waves/SIMD]: 5
10
11
; STDERR-NEXT: remark: foo.cl:27:0: SGPRs Spill: 0
11
12
; STDERR-NEXT: remark: foo.cl:27:0: VGPRs Spill: 0
55
56
; REMARK-NEXT: Args:
56
57
; REMARK-NEXT: - String: ' ScratchSize [bytes/lane]: '
57
58
; REMARK-NEXT: - ScratchSize: '0'
58
- ; REMARK-NEXT: ...
59
+ ; REMARK-NEXT: ..
60
+ ; REMARK-NEXT: --- !Analysis
61
+ ; REMARK-NEXT: Pass: kernel-resource-usage
62
+ ; REMARK-NEXT: Name: DynamicStack
63
+ ; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
64
+ ; REMARK-NEXT: Function: test_kernel
65
+ ; REMARK-NEXT: Args:
66
+ ; REMARK-NEXT: - String: ' Dynamic Stack:
67
+ ; REMARK-NEXT: - DynamicStack: 'False'
68
+ ; REMARK-NEXT: ..
59
69
; REMARK-NEXT: --- !Analysis
60
70
; REMARK-NEXT: Pass: kernel-resource-usage
61
71
; REMARK-NEXT: Name: Occupancy
@@ -108,6 +118,7 @@ define amdgpu_kernel void @test_kernel() !dbg !3 {
108
118
; STDERR-NEXT: remark: foo.cl:42:0: VGPRs: 0
109
119
; STDERR-NEXT: remark: foo.cl:42:0: AGPRs: 0
110
120
; STDERR-NEXT: remark: foo.cl:42:0: ScratchSize [bytes/lane]: 0
121
+ ; STDERR-NEXT: remark: foo.cl:42:0: Dynamic Stack: False
111
122
; STDERR-NEXT: remark: foo.cl:42:0: Occupancy [waves/SIMD]: 0
112
123
; STDERR-NEXT: remark: foo.cl:42:0: SGPRs Spill: 0
113
124
; STDERR-NEXT: remark: foo.cl:42:0: VGPRs Spill: 0
@@ -124,6 +135,7 @@ define void @test_func() !dbg !6 {
124
135
; STDERR-NEXT: remark: foo.cl:8:0: VGPRs: 0
125
136
; STDERR-NEXT: remark: foo.cl:8:0: AGPRs: 0
126
137
; STDERR-NEXT: remark: foo.cl:8:0: ScratchSize [bytes/lane]: 0
138
+ ; STDERR-NEXT: remark: foo.cl:8:0: Dynamic Stack: False
127
139
; STDERR-NEXT: remark: foo.cl:8:0: Occupancy [waves/SIMD]: 8
128
140
; STDERR-NEXT: remark: foo.cl:8:0: SGPRs Spill: 0
129
141
; STDERR-NEXT: remark: foo.cl:8:0: VGPRs Spill: 0
@@ -137,15 +149,56 @@ define amdgpu_kernel void @empty_kernel() !dbg !7 {
137
149
; STDERR-NEXT: remark: foo.cl:52:0: VGPRs: 0
138
150
; STDERR-NEXT: remark: foo.cl:52:0: AGPRs: 0
139
151
; STDERR-NEXT: remark: foo.cl:52:0: ScratchSize [bytes/lane]: 0
152
+ ; STDERR-NEXT: remark: foo.cl:52:0: Dynamic Stack: False
140
153
; STDERR-NEXT: remark: foo.cl:52:0: Occupancy [waves/SIMD]: 0
141
154
; STDERR-NEXT: remark: foo.cl:52:0: SGPRs Spill: 0
142
155
; STDERR-NEXT: remark: foo.cl:52:0: VGPRs Spill: 0
143
156
define void @empty_func () !dbg !8 {
144
157
ret void
145
158
}
146
159
160
+ ; STDERR: remark: foo.cl:64:0: Function Name: test_indirect_call
161
+ ; STDERR-NEXT: remark: foo.cl:64:0: SGPRs: 39
162
+ ; STDERR-NEXT: remark: foo.cl:64:0: VGPRs: 32
163
+ ; STDERR-NEXT: remark: foo.cl:64:0: AGPRs: 10
164
+ ; STDERR-NEXT: remark: foo.cl:64:0: ScratchSize [bytes/lane]: 0
165
+ ; STDERR-NEXT: remark: foo.cl:64:0: Dynamic Stack: True
166
+ ; STDERR-NEXT: remark: foo.cl:64:0: Occupancy [waves/SIMD]: 8
167
+ ; STDERR-NEXT: remark: foo.cl:64:0: SGPRs Spill: 0
168
+ ; STDERR-NEXT: remark: foo.cl:64:0: VGPRs Spill: 0
169
+ ; STDERR-NEXT: remark: foo.cl:64:0: LDS Size [bytes/block]: 0
170
+ @gv.fptr0 = external hidden unnamed_addr addrspace (4 ) constant ptr , align 4
171
+
172
+ define amdgpu_kernel void @test_indirect_call () !dbg !9 {
173
+ %fptr = load ptr , ptr addrspace (4 ) @gv.fptr0
174
+ call void %fptr ()
175
+ ret void
176
+ }
177
+
178
+ ; STDERR: remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack
179
+ ; STDERR-NEXT: remark: foo.cl:74:0: SGPRs: 39
180
+ ; STDERR-NEXT: remark: foo.cl:74:0: VGPRs: 32
181
+ ; STDERR-NEXT: remark: foo.cl:74:0: AGPRs: 10
182
+ ; STDERR-NEXT: remark: foo.cl:74:0: ScratchSize [bytes/lane]: 64
183
+ ; STDERR-NEXT: remark: foo.cl:74:0: Dynamic Stack: True
184
+ ; STDERR-NEXT: remark: foo.cl:74:0: Occupancy [waves/SIMD]: 8
185
+ ; STDERR-NEXT: remark: foo.cl:74:0: SGPRs Spill: 0
186
+ ; STDERR-NEXT: remark: foo.cl:74:0: VGPRs Spill: 0
187
+ ; STDERR-NEXT: remark: foo.cl:74:0: LDS Size [bytes/block]: 0
188
+
189
+ declare void @llvm.memset.p5.i64 (ptr addrspace (5 ) nocapture readonly , i8 , i64 , i1 immarg)
190
+
191
+ define amdgpu_kernel void @test_indirect_w_static_stack () !dbg !10 {
192
+ %alloca = alloca <10 x i64 >, align 16 , addrspace (5 )
193
+ call void @llvm.memset.p5.i64 (ptr addrspace (5 ) %alloca , i8 0 , i64 40 , i1 false )
194
+ %fptr = load ptr , ptr addrspace (4 ) @gv.fptr0
195
+ call void %fptr ()
196
+ ret void
197
+ }
198
+
147
199
!llvm.dbg.cu = !{!0 }
148
200
!llvm.module.flags = !{!2 }
201
+ !llvm.module.flags = !{!11 }
149
202
150
203
!0 = distinct !DICompileUnit (language: DW_LANG_C99, file: !1 , isOptimized: true , runtimeVersion: 0 , emissionKind: FullDebug)
151
204
!1 = !DIFile (filename: "foo.cl" , directory: "/tmp" )
@@ -156,3 +209,6 @@ define void @empty_func() !dbg !8 {
156
209
!6 = distinct !DISubprogram (name: "test_func" , scope: !1 , file: !1 , type: !4 , scopeLine: 42 , flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0 )
157
210
!7 = distinct !DISubprogram (name: "empty_kernel" , scope: !1 , file: !1 , type: !4 , scopeLine: 8 , flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0 )
158
211
!8 = distinct !DISubprogram (name: "empty_func" , scope: !1 , file: !1 , type: !4 , scopeLine: 52 , flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0 )
212
+ !9 = distinct !DISubprogram (name: "test_indirect_call" , scope: !1 , file: !1 , type: !4 , scopeLine: 64 , flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0 )
213
+ !10 = distinct !DISubprogram (name: "test_indirect_w_static_stack" , scope: !1 , file: !1 , type: !4 , scopeLine: 74 , flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0 )
214
+ !11 = !{i32 1 , !"amdgpu_code_object_version" , i32 500 }
0 commit comments