Skip to content

Commit d1dab92

Browse files
Esteban Padilla Cerdiofacebook-github-bot
authored andcommitted
RegCount NITER calculation
Differential Revision: D59405012
1 parent f1e673f commit d1dab92

File tree

9 files changed

+277
-2
lines changed

9 files changed

+277
-2
lines changed

backends/vulkan/runtime/api/vk_api/QueryPool.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,14 +239,20 @@ void QueryPool::print_results() {
239239
}
240240

241241
unsigned long QueryPool::get_total_shader_ns(std::string kernel_name) {
242+
uint64_t total_ns = 0;
243+
uint32_t count = 0;
242244
for (ShaderDuration& entry : shader_durations_) {
243245
if (entry.kernel_name == kernel_name) {
244246
std::chrono::duration<size_t, std::nano> exec_duration_ns(
245247
entry.execution_duration_ns);
246-
return exec_duration_ns.count();
248+
total_ns += exec_duration_ns.count();
249+
count++;
247250
}
248251
}
249-
return 0;
252+
if (count == 0){
253+
return 0;
254+
}
255+
return total_ns / count;
250256
}
251257
} // namespace vkapi
252258
} // namespace vkcompute

backends/vulkan/runtime/api/vk_api/QueryPool.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,10 @@ class QueryPool final {
103103
void print_results();
104104
unsigned long get_total_shader_ns(std::string kernel_name);
105105

106+
uint32_t max_query_count() {
107+
return config_.max_query_count;
108+
}
109+
106110
operator bool() const {
107111
return querypool_ != VK_NULL_HANDLE;
108112
}

backends/vulkan/tools/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/ArchProbe/
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/usr/bin/env bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
buck2 build //xplat/executorch/backends/vulkan/tools/gpuinfo:vulkan_gpuinfo --target-platforms=ovr_config//platform/android:arm64-fbsource --show-output -c ndk.static_linking=true -c ndk.debug_info_level=1 -c executorch.event_tracer_enabled=true
9+
adb push ~/fbsource/buck-out/v2/gen/fbsource/f292370ff0e539e9/xplat/executorch/backends/vulkan/tools/gpuinfo/__vulkan_gpuinfo__/vulkan_gpuinfo /data/local/tmp
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#version 450 core
10+
11+
#define PRECISION ${PRECISION}
12+
13+
#define VEC4_T ${buffer_gvec_type(DTYPE, 4)}
14+
15+
layout(std430) buffer;
16+
17+
layout(set = 0, binding = 0) buffer PRECISION restrict writeonly Buffer {
18+
VEC4_T data[];
19+
}
20+
buffer_in;
21+
22+
layout(set = 0, binding = 1) uniform PRECISION restrict Params {
23+
int len;
24+
}
25+
params;
26+
27+
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
28+
29+
layout(constant_id = 3) const int niter = 1;
30+
31+
void main() {
32+
float reg_data0 = float(niter) + 0;
33+
int i = 0;
34+
for (; i < niter; ++i) {
35+
reg_data0 *= reg_data0;
36+
}
37+
i = i >> 31;
38+
buffer_in.data[0 * i][0] = reg_data0;
39+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
reg_count:
8+
parameter_names_with_default_values:
9+
DTYPE: float
10+
STORAGE: buffer
11+
generate_variant_forall:
12+
DTYPE:
13+
- VALUE: float
14+
STORAGE:
15+
- VALUE: buffer
16+
shader_variants:
17+
- NAME: reg_count
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/backends/vulkan/runtime/api/api.h>
10+
11+
using namespace vkcompute;
12+
using namespace api;
13+
14+
#define QP context()->querypool()
15+
16+
auto benchmark_on_gpu(
17+
std::string shader_id,
18+
uint32_t niter,
19+
std::function<void()> run_kernel) {
20+
auto fence = context()->fences().get_fence();
21+
22+
// /2 because there are two timestamp queries per kernel
23+
uint32_t max_query_count = QP.max_query_count() / 2;
24+
uint32_t runs = std::min(niter, max_query_count);
25+
uint64_t count = 0;
26+
27+
for (int i = 0; i < runs; ++i) {
28+
run_kernel();
29+
context()->submit_cmd_to_gpu(fence.get_submit_handle());
30+
};
31+
fence.wait();
32+
QP.extract_results();
33+
count += QP.get_total_shader_ns(shader_id);
34+
QP.reset_state();
35+
context()->descriptor_pool().flush();
36+
37+
return count / 1000.f;
38+
}
39+
40+
void ensure_min_niter(
41+
double min_time_us,
42+
uint32_t& niter,
43+
std::function<double()> run) {
44+
const uint32_t DEFAULT_NITER = 100;
45+
niter = DEFAULT_NITER;
46+
for (uint32_t i = 0; i < 100; ++i) {
47+
double t = run();
48+
if (t > min_time_us * 0.99) {
49+
return;
50+
}
51+
niter = uint32_t(niter * min_time_us / t);
52+
}
53+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
10+
#include <executorch/backends/vulkan/runtime/api/api.h>
11+
#include <executorch/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h>
12+
#include <iostream>
13+
14+
#include "utils.h"
15+
16+
void reg_count() {
17+
const uint32_t NREG_MIN = 1;
18+
19+
uint32_t NITER;
20+
21+
auto bench = [&](uint32_t nthread, uint32_t ngrp, uint32_t nreg) {
22+
size_t len = sizeof(float);
23+
StorageBuffer buffer(context(), vkapi::kFloat, len);
24+
ParamsBuffer params(context(), int32_t(len));
25+
vkapi::PipelineBarrier pipeline_barrier{};
26+
27+
auto time = benchmark_on_gpu("reg_count_float_buffer", NITER, [&]() {
28+
context()->submit_compute_job(
29+
VK_KERNEL(reg_count_float_buffer),
30+
pipeline_barrier,
31+
{nthread, ngrp, 1},
32+
{nthread, 1, 1},
33+
{SV(NITER)},
34+
VK_NULL_HANDLE,
35+
0,
36+
buffer.buffer(),
37+
params.buffer());
38+
});
39+
return time;
40+
};
41+
42+
ensure_min_niter(1000, NITER, [&]() { return bench(1, 1, NREG_MIN); });
43+
std::cout << "NITER: " << NITER << std::endl;
44+
}
45+
46+
int main(int argc, const char** argv) {
47+
context()->initialize_querypool();
48+
49+
reg_count();
50+
51+
return 0;
52+
}
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
2+
load(
3+
"@fbsource//tools/build_defs:platform_defs.bzl",
4+
"ANDROID",
5+
)
6+
7+
def get_vulkan_compiler_flags():
8+
return ["-Wno-missing-prototypes", "-Wno-global-constructors"]
9+
10+
def vulkan_spv_shader_lib(name, spv_filegroups, is_fbcode = False):
11+
gen_vulkan_spv_target = "//executorch/backends/vulkan:gen_vulkan_spv_bin"
12+
glslc_path = "//caffe2/fb/vulkan/dotslash:glslc"
13+
if is_fbcode:
14+
glslc_path = "//caffe2/fb/vulkan/tools:glslc"
15+
16+
glsl_paths = []
17+
18+
# TODO(ssjia): remove the need for subpath once subdir_glob is enabled in OSS
19+
for target, subpath in spv_filegroups.items():
20+
glsl_paths.append("$(location {})/{}".format(target, subpath))
21+
22+
genrule_cmd = [
23+
"$(exe {})".format(gen_vulkan_spv_target),
24+
"--glsl-paths {}".format(" ".join(glsl_paths)),
25+
"--output-path $OUT",
26+
"--glslc-path=$(exe {})".format(glslc_path),
27+
"--tmp-dir-path=$OUT",
28+
]
29+
30+
genrule_name = "gen_{}_cpp".format(name)
31+
runtime.genrule(
32+
name = genrule_name,
33+
outs = {
34+
"{}.cpp".format(name): ["spv.cpp"],
35+
},
36+
cmd = " ".join(genrule_cmd),
37+
default_outs = ["."],
38+
labels = ["uses_dotslash"],
39+
)
40+
41+
runtime.cxx_library(
42+
name = name,
43+
srcs = [
44+
":{}[{}.cpp]".format(genrule_name, name),
45+
],
46+
compiler_flags = get_vulkan_compiler_flags(),
47+
define_static_target = False,
48+
# Static initialization is used to register shaders to the global shader registry,
49+
# therefore link_whole must be True to make sure unused symbols are not discarded.
50+
# @lint-ignore BUCKLINT: Avoid `link_whole=True`
51+
link_whole = True,
52+
# Define a soname that can be used for dynamic loading in Java, Python, etc.
53+
soname = "lib{}.$(ext)".format(name),
54+
exported_deps = [
55+
"//executorch/backends/vulkan:vulkan_compute_api",
56+
],
57+
)
58+
59+
60+
61+
def define_targets():
62+
runtime.filegroup(
63+
name = "gpuinfo_shaders",
64+
srcs = glob([
65+
"glsl/*",
66+
]),
67+
68+
)
69+
70+
vulkan_spv_shader_lib(
71+
name = "gpuinfo_shader_lib",
72+
spv_filegroups = {
73+
":gpuinfo_shaders": "glsl",
74+
},
75+
)
76+
runtime.cxx_binary(
77+
name = "vulkan_gpuinfo",
78+
srcs = glob([
79+
"**/*.cpp",
80+
]),
81+
headers = glob([
82+
"**/*.h",
83+
]),
84+
header_namespace = "/include",
85+
include_directories = ["/include"],
86+
platforms = ANDROID,
87+
raw_headers = glob([
88+
"**/*.h",
89+
]),
90+
deps = [
91+
":gpuinfo_shader_lib",
92+
"//executorch/backends/vulkan:vulkan_graph_runtime",
93+
],
94+
)

0 commit comments

Comments
 (0)