Skip to content

Commit fdaace9

Browse files
Esteban Padilla Cerdiofacebook-github-bot
authored andcommitted
RegCount NITER calculation (#4159)
Summary: Pull Request resolved: #4159 This adds an internal implementation of https://github.com/microsoft/ArchProbe. This stack introduces a kernel that can be used to get the number of available registers on a mobile GPU by gradually increasing the number of accessed elements and detecting dramatic drops in performance. See [this paper ](https://www.microsoft.com/en-us/research/uploads/prod/2022/02/mobigpu_mobicom22_camera.pdf), page 4, for more information. This first diff gets the number of iterations (NITER) that can run in 1000us, to be used in the following tests. The kernel looks like the following for any K number of registers: float reg_data0 = float(niter) + 0; float reg_data1 = float(niter) + 1; ... float reg_dataK = float(niter) + K; int i = 0; for (; i < niter; ++i) { reg_data0 *= reg_dataK; reg_data1 *= reg_data0; reg_data2 *= reg_data1; ... reg_dataK *= reg_data(K-1); } i = i >> 31; buffer_out.data[0 * i] = reg_data0; buffer_out.data[1 * i] = reg_data1; ... buffer_out.data[K * i] = reg_dataK; Differential Revision: D59405012
1 parent 3521021 commit fdaace9

File tree

8 files changed

+733
-2
lines changed

8 files changed

+733
-2
lines changed

backends/vulkan/runtime/api/vk_api/QueryPool.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,14 +239,20 @@ void QueryPool::print_results() {
239239
}
240240

241241
unsigned long QueryPool::get_total_shader_ns(std::string kernel_name) {
242+
uint64_t total_ns = 0;
243+
uint32_t count = 0;
242244
for (ShaderDuration& entry : shader_durations_) {
243245
if (entry.kernel_name == kernel_name) {
244246
std::chrono::duration<size_t, std::nano> exec_duration_ns(
245247
entry.execution_duration_ns);
246-
return exec_duration_ns.count();
248+
total_ns += exec_duration_ns.count();
249+
count++;
247250
}
248251
}
249-
return 0;
252+
if (count == 0) {
253+
return 0;
254+
}
255+
return total_ns / count;
250256
}
251257
} // namespace vkapi
252258
} // namespace vkcompute

backends/vulkan/tools/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/ArchProbe/

backends/vulkan/tools/gpuinfo/TARGETS

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
load("@fbcode_macros//build_defs:native_rules.bzl", "buck_filegroup")
2+
load("@fbsource//tools/build_defs:fb_xplat_cxx_binary.bzl", "fb_xplat_cxx_binary")
3+
load(
4+
"@fbsource//tools/build_defs:platform_defs.bzl",
5+
"ANDROID",
6+
)
7+
load(
8+
"@fbsource//xplat/executorch/backends/vulkan:targets.bzl",
9+
"vulkan_spv_shader_lib",
10+
)
11+
12+
oncall("executorch")
13+
14+
buck_filegroup(
15+
name = "gpuinfo_shaders",
16+
srcs = glob([
17+
"glsl/*",
18+
]),
19+
visibility = [
20+
"PUBLIC",
21+
],
22+
)
23+
24+
vulkan_spv_shader_lib(
25+
name = "gpuinfo_shader_lib",
26+
spv_filegroups = {
27+
":gpuinfo_shaders": "glsl",
28+
},
29+
)
30+
31+
fb_xplat_cxx_binary(
32+
name = "vulkan_gpuinfo",
33+
srcs = glob([
34+
"**/*.cpp",
35+
]),
36+
headers = glob([
37+
"**/*.h",
38+
]),
39+
header_namespace = "/include",
40+
include_directories = ["/include"],
41+
platforms = ANDROID,
42+
raw_headers = glob([
43+
"**/*.h",
44+
]),
45+
deps = [
46+
":gpuinfo_shader_lib",
47+
"//executorch/backends/vulkan:vulkan_graph_runtime",
48+
],
49+
)
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/usr/bin/env bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
buck2 build //xplat/executorch/backends/vulkan/tools/gpuinfo:vulkan_gpuinfo --target-platforms=ovr_config//platform/android:arm64-fbsource --show-output -c ndk.static_linking=true -c ndk.debug_info_level=1 -c executorch.event_tracer_enabled=true
9+
adb push ~/fbsource/buck-out/v2/gen/fbsource/f292370ff0e539e9/xplat/executorch/backends/vulkan/tools/gpuinfo/__vulkan_gpuinfo__/vulkan_gpuinfo /data/local/tmp
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#version 450 core
10+
11+
#define PRECISION ${PRECISION}
12+
13+
layout(std430) buffer;
14+
15+
layout(set = 0, binding = 0) buffer PRECISION restrict writeonly Buffer {
16+
float data[];
17+
}
18+
out_buff;
19+
20+
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
21+
22+
layout(constant_id = 3) const int NITER = 1;
23+
24+
void main() {
25+
26+
${"\n".join(["float reg_data"+str(i)+" = float(NITER) + "+str(i)+";" for i in range(int(NREG))])}
27+
28+
int i = 0;
29+
for (; i < NITER; ++i) {
30+
reg_data0 *= reg_data${int(NREG)-1};
31+
${"\n".join(["reg_data"+str(i)+" *= reg_data"+str(i-1)+";" for i in range(1, int(NREG))])}
32+
}
33+
i = i >> 31;
34+
35+
${"\n".join(["out_buff.data["+str(i)+" * i] = reg_data"+str(i)+";" for i in range(int(NREG))])}
36+
}

0 commit comments

Comments
 (0)