Skip to content

Commit 8cb2508

Browse files
estebanpadillafacebook-github-bot
authored andcommitted
RegCount NITER calculation (#4159)
Summary: Pull Request resolved: #4159 This adds an internal implementation of https://github.com/microsoft/ArchProbe. This stack introduces a kernel that can be used to get the number of available registers on a mobile GPU by gradually increasing the number of accessed elements and detecting dramatic drops in performance. See [this paper ](https://www.microsoft.com/en-us/research/uploads/prod/2022/02/mobigpu_mobicom22_camera.pdf), page 4, for more information. This first diff gets the number of iterations (NITER) that can run in 1000us, to be used in the following tests. The kernel looks like the following for any K number of registers: float reg_data0 = float(niter) + 0; float reg_data1 = float(niter) + 1; ... float reg_dataK = float(niter) + K; int i = 0; for (; i < niter; ++i) { reg_data0 *= reg_dataK; reg_data1 *= reg_data0; reg_data2 *= reg_data1; ... reg_dataK *= reg_data(K-1); } i = i >> 31; buffer_out.data[0 * i] = reg_data0; buffer_out.data[1 * i] = reg_data1; ... buffer_out.data[K * i] = reg_dataK; Differential Revision: D59405012
1 parent 074a81e commit 8cb2508

File tree

7 files changed

+736
-0
lines changed

7 files changed

+736
-0
lines changed

backends/vulkan/runtime/vk_api/QueryPool.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,5 +248,22 @@ unsigned long QueryPool::get_total_shader_ns(std::string kernel_name) {
248248
}
249249
return 0;
250250
}
251+
252+
unsigned long QueryPool::get_mean_shader_ns(std::string kernel_name) {
253+
uint64_t total_ns = 0;
254+
uint32_t count = 0;
255+
for (ShaderDuration& entry : shader_durations_) {
256+
if (entry.kernel_name == kernel_name) {
257+
std::chrono::duration<size_t, std::nano> exec_duration_ns(
258+
entry.execution_duration_ns);
259+
total_ns += exec_duration_ns.count();
260+
count++;
261+
}
262+
}
263+
if (count == 0) {
264+
return 0;
265+
}
266+
return total_ns / count;
267+
}
251268
} // namespace vkapi
252269
} // namespace vkcompute

backends/vulkan/runtime/vk_api/QueryPool.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ class QueryPool final {
102102
std::string generate_string_report();
103103
void print_results();
104104
unsigned long get_total_shader_ns(std::string kernel_name);
105+
unsigned long get_mean_shader_ns(std::string kernel_name);
105106

106107
operator bool() const {
107108
return querypool_ != VK_NULL_HANDLE;

backends/vulkan/tools/gpuinfo/TARGETS

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
load("@fbcode_macros//build_defs:native_rules.bzl", "buck_filegroup")
2+
load("@fbsource//tools/build_defs:fb_xplat_cxx_binary.bzl", "fb_xplat_cxx_binary")
3+
load(
4+
"@fbsource//tools/build_defs:platform_defs.bzl",
5+
"ANDROID",
6+
)
7+
load(
8+
"@fbsource//xplat/executorch/backends/vulkan:targets.bzl",
9+
"vulkan_spv_shader_lib",
10+
)
11+
12+
oncall("executorch")
13+
14+
buck_filegroup(
15+
name = "gpuinfo_shaders",
16+
srcs = glob([
17+
"glsl/*",
18+
]),
19+
visibility = [
20+
"PUBLIC",
21+
],
22+
)
23+
24+
vulkan_spv_shader_lib(
25+
name = "gpuinfo_shader_lib",
26+
spv_filegroups = {
27+
":gpuinfo_shaders": "glsl",
28+
},
29+
)
30+
31+
fb_xplat_cxx_binary(
32+
name = "vulkan_gpuinfo",
33+
srcs = glob([
34+
"**/*.cpp",
35+
]),
36+
headers = glob([
37+
"**/*.h",
38+
]),
39+
header_namespace = "/include",
40+
include_directories = ["/include"],
41+
platforms = ANDROID,
42+
raw_headers = glob([
43+
"**/*.h",
44+
]),
45+
deps = [
46+
":gpuinfo_shader_lib",
47+
"//executorch/backends/vulkan:vulkan_graph_runtime",
48+
],
49+
)
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#version 450 core
10+
11+
#define PRECISION ${PRECISION}
12+
13+
layout(std430) buffer;
14+
15+
layout(set = 0, binding = 0) buffer PRECISION restrict writeonly Buffer {
16+
float data[];
17+
}
18+
out_buff;
19+
20+
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
21+
22+
layout(constant_id = 3) const int NITER = 1;
23+
24+
void main() {
25+
26+
$for k in range(int(NREG)):
27+
float reg_data${k} = float(NITER) + ${k};
28+
29+
int i = 0;
30+
for (; i < NITER; ++i) {
31+
reg_data0 *= reg_data${int(NREG)-1};
32+
$for k in range(1, int(NREG)):
33+
reg_data${k} *= reg_data${k-1};
34+
}
35+
i = i >> 31;
36+
37+
$for k in range(int(NREG)):
38+
out_buff.data[${k} * i] = reg_data${k};
39+
}

0 commit comments

Comments
 (0)