Skip to content

Commit 3d3babf

Browse files
estebanpadillafacebook-github-bot
authored andcommitted
RegCount NITER calculation (pytorch#4159)
Summary: Pull Request resolved: pytorch#4159 Differential Revision: D59405012
1 parent a33936b commit 3d3babf

File tree

9 files changed

+739
-2
lines changed

9 files changed

+739
-2
lines changed

backends/vulkan/runtime/api/vk_api/QueryPool.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,14 +239,20 @@ void QueryPool::print_results() {
239239
}
240240

241241
unsigned long QueryPool::get_total_shader_ns(std::string kernel_name) {
242+
uint64_t total_ns = 0;
243+
uint32_t count = 0;
242244
for (ShaderDuration& entry : shader_durations_) {
243245
if (entry.kernel_name == kernel_name) {
244246
std::chrono::duration<size_t, std::nano> exec_duration_ns(
245247
entry.execution_duration_ns);
246-
return exec_duration_ns.count();
248+
total_ns += exec_duration_ns.count();
249+
count++;
247250
}
248251
}
249-
return 0;
252+
if (count == 0) {
253+
return 0;
254+
}
255+
return total_ns / count;
250256
}
251257
} // namespace vkapi
252258
} // namespace vkcompute

backends/vulkan/runtime/api/vk_api/QueryPool.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,10 @@ class QueryPool final {
103103
void print_results();
104104
unsigned long get_total_shader_ns(std::string kernel_name);
105105

106+
uint32_t max_query_count() {
107+
return config_.max_query_count;
108+
}
109+
106110
operator bool() const {
107111
return querypool_ != VK_NULL_HANDLE;
108112
}

backends/vulkan/tools/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/ArchProbe/

backends/vulkan/tools/gpuinfo/TARGETS

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
load("@fbcode_macros//build_defs:native_rules.bzl", "buck_filegroup")
2+
load("@fbsource//tools/build_defs:fb_xplat_cxx_binary.bzl", "fb_xplat_cxx_binary")
3+
load(
4+
"@fbsource//tools/build_defs:platform_defs.bzl",
5+
"ANDROID",
6+
)
7+
load(
8+
"@fbsource//xplat/executorch/backends/vulkan:targets.bzl",
9+
"vulkan_spv_shader_lib",
10+
)
11+
12+
oncall("executorch")
13+
14+
buck_filegroup(
15+
name = "gpuinfo_shaders",
16+
srcs = glob([
17+
"glsl/*",
18+
]),
19+
visibility = [
20+
"PUBLIC",
21+
],
22+
)
23+
24+
vulkan_spv_shader_lib(
25+
name = "gpuinfo_shader_lib",
26+
spv_filegroups = {
27+
":gpuinfo_shaders": "glsl",
28+
},
29+
)
30+
31+
fb_xplat_cxx_binary(
32+
name = "vulkan_gpuinfo",
33+
srcs = glob([
34+
"**/*.cpp",
35+
]),
36+
headers = glob([
37+
"**/*.h",
38+
]),
39+
header_namespace = "/include",
40+
include_directories = ["/include"],
41+
platforms = ANDROID,
42+
raw_headers = glob([
43+
"**/*.h",
44+
]),
45+
deps = [
46+
":gpuinfo_shader_lib",
47+
"//executorch/backends/vulkan:vulkan_graph_runtime",
48+
],
49+
)
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/usr/bin/env bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
buck2 build //xplat/executorch/backends/vulkan/tools/gpuinfo:vulkan_gpuinfo --target-platforms=ovr_config//platform/android:arm64-fbsource --show-output -c ndk.static_linking=true -c ndk.debug_info_level=1 -c executorch.event_tracer_enabled=true
9+
adb push ~/fbsource/buck-out/v2/gen/fbsource/f292370ff0e539e9/xplat/executorch/backends/vulkan/tools/gpuinfo/__vulkan_gpuinfo__/vulkan_gpuinfo /data/local/tmp
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#version 450 core
10+
11+
#define PRECISION ${PRECISION}
12+
13+
layout(std430) buffer;
14+
15+
layout(set = 0, binding = 0) buffer PRECISION restrict writeonly Buffer {
16+
float data[];
17+
}
18+
out_buff;
19+
20+
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
21+
22+
layout(constant_id = 3) const int NITER = 1;
23+
24+
void main() {
25+
26+
${"\n".join(["float reg_data"+str(i)+" = float(NITER) + "+str(i)+";" for i in range(int(NREG))])}
27+
28+
int i = 0;
29+
for (; i < NITER; ++i) {
30+
reg_data0 *= reg_data${int(NREG)-1};
31+
${"\n".join(["reg_data"+str(i)+" *= reg_data"+str(i-1)+";" for i in range(1, int(NREG))])}
32+
}
33+
i = i >> 31;
34+
35+
${"\n".join(["out_buff.data["+str(i)+" * i] = reg_data"+str(i)+";" for i in range(int(NREG))])}
36+
}

0 commit comments

Comments
 (0)