Skip to content

Commit 724a909

Browse files
committed
Update on "Let models provider their own specific special tokens"
Differential Revision: [D59651199](https://our.internmc.facebook.com/intern/diff/D59651199/) [ghstack-poisoned]
2 parents cf39cde + 4bba032 commit 724a909

File tree

15 files changed

+475
-16
lines changed

15 files changed

+475
-16
lines changed

backends/vulkan/runtime/gen_vulkan_spv.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -525,8 +525,29 @@ def generateVariantCombinations(
525525
if param_name not in exclude_params:
526526
param_values = []
527527
for value in value_list:
528-
suffix = value.get("SUFFIX", value["VALUE"])
529-
param_values.append((param_name, suffix, value["VALUE"]))
528+
if "RANGE" in value:
529+
value_range = value["RANGE"]
530+
suffix = value.get("SUFFIX", "")
531+
if isinstance(value_range, list) and len(value_range) == 2:
532+
for i in range(value_range[0], value_range[1] + 1):
533+
curr_suffix = (
534+
suffix + "_" + str(i) if suffix else str(i)
535+
)
536+
param_values.append((param_name, curr_suffix, str(i)))
537+
else:
538+
raise ValueError(
539+
f"{value['RANGE']} is not a valid range. Must be in format [start, end] (inclusive)."
540+
)
541+
542+
elif "VALUE" in value:
543+
suffix = value.get("SUFFIX", value["VALUE"])
544+
param_values.append((param_name, suffix, value["VALUE"]))
545+
546+
else:
547+
raise KeyError(
548+
"Parameter must be 'VALUE: string' or 'RANGE: [a, b]'"
549+
)
550+
530551
all_iterated_params.append(param_values)
531552

532553
return list(product(*all_iterated_params))

backends/vulkan/runtime/vk_api/QueryPool.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,5 +248,22 @@ unsigned long QueryPool::get_total_shader_ns(std::string kernel_name) {
248248
}
249249
return 0;
250250
}
251+
252+
unsigned long QueryPool::get_mean_shader_ns(std::string kernel_name) {
253+
uint64_t total_ns = 0;
254+
uint32_t count = 0;
255+
for (ShaderDuration& entry : shader_durations_) {
256+
if (entry.kernel_name == kernel_name) {
257+
std::chrono::duration<size_t, std::nano> exec_duration_ns(
258+
entry.execution_duration_ns);
259+
total_ns += exec_duration_ns.count();
260+
count++;
261+
}
262+
}
263+
if (count == 0) {
264+
return 0;
265+
}
266+
return total_ns / count;
267+
}
251268
} // namespace vkapi
252269
} // namespace vkcompute

backends/vulkan/runtime/vk_api/QueryPool.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ class QueryPool final {
102102
std::string generate_string_report();
103103
void print_results();
104104
unsigned long get_total_shader_ns(std::string kernel_name);
105+
unsigned long get_mean_shader_ns(std::string kernel_name);
105106

106107
operator bool() const {
107108
return querypool_ != VK_NULL_HANDLE;

backends/vulkan/tools/gpuinfo/TARGETS

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
load("@fbcode_macros//build_defs:native_rules.bzl", "buck_filegroup")
2+
load("@fbsource//tools/build_defs:fb_xplat_cxx_binary.bzl", "fb_xplat_cxx_binary")
3+
load(
4+
"@fbsource//tools/build_defs:platform_defs.bzl",
5+
"ANDROID",
6+
)
7+
load(
8+
"@fbsource//xplat/executorch/backends/vulkan:targets.bzl",
9+
"vulkan_spv_shader_lib",
10+
)
11+
12+
oncall("executorch")
13+
14+
buck_filegroup(
15+
name = "gpuinfo_shaders",
16+
srcs = glob([
17+
"glsl/*",
18+
]),
19+
visibility = [
20+
"PUBLIC",
21+
],
22+
)
23+
24+
vulkan_spv_shader_lib(
25+
name = "gpuinfo_shader_lib",
26+
spv_filegroups = {
27+
":gpuinfo_shaders": "glsl",
28+
},
29+
)
30+
31+
fb_xplat_cxx_binary(
32+
name = "vulkan_gpuinfo",
33+
srcs = glob([
34+
"**/*.cpp",
35+
]),
36+
headers = glob([
37+
"**/*.h",
38+
]),
39+
header_namespace = "/include",
40+
include_directories = ["/include"],
41+
platforms = ANDROID,
42+
raw_headers = glob([
43+
"**/*.h",
44+
]),
45+
deps = [
46+
":gpuinfo_shader_lib",
47+
"//executorch/backends/vulkan:vulkan_graph_runtime",
48+
],
49+
)
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#version 450 core
10+
11+
#define PRECISION ${PRECISION}
12+
13+
layout(std430) buffer;
14+
15+
layout(set = 0, binding = 0) buffer PRECISION restrict writeonly Buffer {
16+
float data[];
17+
}
18+
out_buff;
19+
20+
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
21+
22+
layout(constant_id = 3) const int NITER = 1;
23+
24+
void main() {
25+
26+
$for k in range(int(NREG)):
27+
float reg_data${k} = float(NITER) + ${k};
28+
29+
int i = 0;
30+
for (; i < NITER; ++i) {
31+
reg_data0 *= reg_data${int(NREG)-1};
32+
$for k in range(1, int(NREG)):
33+
reg_data${k} *= reg_data${k-1};
34+
}
35+
i = i >> 31;
36+
37+
$for k in range(int(NREG)):
38+
out_buff.data[${k} * i] = reg_data${k};
39+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
reg_count:
8+
parameter_names_with_default_values:
9+
DTYPE: float
10+
STORAGE: buffer
11+
generate_variant_forall:
12+
NREG:
13+
- RANGE: [1, 512]
14+
15+
shader_variants:
16+
- NAME: reg_count
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
/*
2+
* Portions (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
/*
10+
* Code sourced from
11+
* https://github.com/microsoft/ArchProbe/blob/main/include/stats.hpp with the
12+
* following MIT license
13+
*
14+
* MIT License
15+
*
16+
* Copyright (c) Microsoft Corporation.
17+
*
18+
* Permission is hereby granted, free of charge, to any person obtaining a copy
19+
* of this software and associated documentation files (the "Software"), to
20+
* deal in the Software without restriction, including without limitation the
21+
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
22+
* sell copies of the Software, and to permit persons to whom the Software is
23+
* furnished to do so, subject to the following conditions:
24+
*
25+
* The above copyright notice and this permission notice shall be included in
26+
* all copies or substantial portions of the Software.
27+
*
28+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
33+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
34+
* IN THE SOFTWARE
35+
*/
36+
37+
#pragma once
38+
#include <array>
39+
#include <cstdint>
40+
41+
template <typename T>
42+
class AvgStats {
43+
T sum_ = 0;
44+
uint64_t n_ = 0;
45+
46+
public:
47+
typedef T value_t;
48+
49+
void push(T value) {
50+
sum_ += value;
51+
n_ += 1;
52+
}
53+
inline bool has_value() const {
54+
return n_ != 0;
55+
}
56+
operator T() const {
57+
return sum_ / n_;
58+
}
59+
};
60+
61+
template <typename T, size_t NTap>
62+
class NTapAvgStats {
63+
std::array<double, NTap> hist_;
64+
size_t cur_idx_;
65+
bool ready_;
66+
67+
public:
68+
typedef T value_t;
69+
70+
void push(T value) {
71+
hist_[cur_idx_++] = value;
72+
if (cur_idx_ >= NTap) {
73+
cur_idx_ = 0;
74+
ready_ = true;
75+
}
76+
}
77+
inline bool has_value() const {
78+
return ready_;
79+
}
80+
operator T() const {
81+
double out = 0.0;
82+
for (double x : hist_) {
83+
out += x;
84+
}
85+
out /= NTap;
86+
return out;
87+
}
88+
};
89+
90+
template <uint32_t NTap>
91+
struct DtJumpFinder {
92+
private:
93+
NTapAvgStats<double, NTap> time_avg_;
94+
AvgStats<double> dtime_avg_;
95+
double compensation_;
96+
double threshold_;
97+
98+
public:
99+
// Compensation is a tiny additive to give on delta time so that the algorithm
100+
// works smoothly when a sequence of identical timing is ingested, which is
101+
// pretty common in our tests. Threshold is simply how many times the new
102+
// delta has to be to be recognized as a deviation.
103+
DtJumpFinder(double compensation = 0.01, double threshold = 10)
104+
: time_avg_(),
105+
dtime_avg_(),
106+
compensation_(compensation),
107+
threshold_(threshold) {}
108+
109+
// Returns true if the delta time regarding to the last data point seems
110+
// normal; returns false if it seems the new data point is too much away from
111+
// the historical records.
112+
bool push(double time) {
113+
if (time_avg_.has_value()) {
114+
double dtime = std::abs(time - time_avg_) + (compensation_ * time_avg_);
115+
if (dtime_avg_.has_value()) {
116+
double ddtime = std::abs(dtime - dtime_avg_);
117+
if (ddtime > threshold_ * dtime_avg_) {
118+
return true;
119+
}
120+
}
121+
dtime_avg_.push(dtime);
122+
}
123+
time_avg_.push(time);
124+
return false;
125+
}
126+
127+
double dtime_avg() const {
128+
return dtime_avg_;
129+
}
130+
double compensate_time() const {
131+
return compensation_ * time_avg_;
132+
}
133+
};
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#pragma once
10+
11+
#include <executorch/backends/vulkan/runtime/api/api.h>
12+
13+
using namespace vkcompute;
14+
using namespace api;
15+
16+
#define QP context()->querypool()
17+
18+
auto benchmark_on_gpu(
19+
std::string shader_id,
20+
uint32_t niter,
21+
std::function<void()> encode_kernel) {
22+
auto fence = context()->fences().get_fence();
23+
24+
for (int i = 0; i < niter; ++i) {
25+
encode_kernel();
26+
};
27+
28+
context()->submit_cmd_to_gpu(fence.get_submit_handle());
29+
fence.wait();
30+
QP.extract_results();
31+
uint64_t count = QP.get_mean_shader_ns(shader_id);
32+
QP.reset_state();
33+
context()->flush();
34+
35+
return count / 1000.f;
36+
}
37+
38+
void ensure_min_niter(
39+
double min_time_us,
40+
uint32_t& niter,
41+
std::function<double()> run) {
42+
const uint32_t DEFAULT_NITER = 100;
43+
niter = DEFAULT_NITER;
44+
for (uint32_t i = 0; i < 100; ++i) {
45+
double t = run();
46+
if (t > min_time_us * 0.99) {
47+
return;
48+
}
49+
niter = uint32_t(niter * min_time_us / t);
50+
}
51+
}

0 commit comments

Comments
 (0)