Skip to content

Commit 98f0f5b

Browse files
committed
[ET-VK][ez] Test command buffer re-encoding on resize
Pull Request resolved: #10978 ## Context Add a test where `encode_execute()` is called again after resizing model inputs and propagating the new sizes. Currently, dynamic shapes are handled by simply updating the tensor metadata when sizes are updated. Compute shaders will perform the same computations with the updated tensor sizes/strides information. However, for some operators, different input sizes require different compute shaders in order to achieve maximum performance. One example of this is for matrix multiplication, where matrix-matrix multiplication typically uses a different algorithm than vector-matrix (or matrix-vector) multiplication. Therefore, for some models, it would be best to trigger a re-encoding of the command buffer upon input resize, so that different compute shaders can be selected based on the current input sizes. The actual changes for enabling shader re-selection will be introduced in the next diff. This diff simply checks that command buffer re-encoding "works as advertised". ## Changes This diff simply adds a test in `vulkan_compute_api_test` to test whether the ComputeGraph API can handle the `encode_execute` function being called multiple times. ghstack-source-id: 284913868 @exported-using-ghexport Differential Revision: [D75013781](https://our.internmc.facebook.com/intern/diff/D75013781/)
1 parent 9663bfb commit 98f0f5b

File tree

4 files changed

+150
-42
lines changed

4 files changed

+150
-42
lines changed

backends/vulkan/runtime/graph/ComputeGraph.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -612,6 +612,11 @@ void ComputeGraph::prepare() {
612612
if (config_.enable_querypool) {
613613
context_->initialize_querypool();
614614
}
615+
616+
for (SharedObject& shared_object : shared_objects_) {
617+
shared_object.allocate(this);
618+
shared_object.bind_users(this);
619+
}
615620
}
616621

617622
void ComputeGraph::encode_prepack() {
@@ -636,11 +641,6 @@ void ComputeGraph::encode_execute() {
636641

637642
context_->cmd_reset_querypool();
638643

639-
for (SharedObject& shared_object : shared_objects_) {
640-
shared_object.allocate(this);
641-
shared_object.bind_users(this);
642-
}
643-
644644
for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
645645
node->encode(this);
646646
}

backends/vulkan/test/utils/test_utils.cpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,59 @@ void execute_graph_and_check_output(
537537
}
538538
}
539539

540+
vkcompute::ComputeGraph build_mm_graph(
541+
int B,
542+
int M,
543+
int K,
544+
int N,
545+
vkcompute::vkapi::ScalarType dtype,
546+
vkcompute::utils::StorageType in_out_stype,
547+
vkcompute::utils::GPUMemoryLayout memory_layout,
548+
const bool prepack_mat2,
549+
const float mat2_val) {
550+
using namespace vkcompute;
551+
GraphConfig config;
552+
ComputeGraph graph(config);
553+
554+
std::vector<int64_t> mat1_size = {M, K};
555+
std::vector<int64_t> mat2_size = {K, N};
556+
std::vector<int64_t> out_size = {M, N};
557+
if (B > 1) {
558+
mat1_size.resize(3);
559+
mat1_size = {B, M, K};
560+
mat2_size.resize(3);
561+
mat2_size = {B, K, N};
562+
out_size.resize(3);
563+
out_size = {B, M, N};
564+
}
565+
566+
IOValueRef mat1 =
567+
graph.add_input_tensor(mat1_size, dtype, in_out_stype, memory_layout);
568+
IOValueRef mat2{};
569+
570+
CREATE_RAND_WEIGHT_TENSOR(mat2_w, mat2_size, dtype);
571+
if (mat2_val != 0.0f) {
572+
std::fill(data_mat2_w.begin(), data_mat2_w.end(), mat2_val);
573+
}
574+
575+
if (prepack_mat2) {
576+
mat2.value = mat2_w;
577+
} else {
578+
mat2.value =
579+
graph.add_tensor(mat2_size, dtype, in_out_stype, memory_layout);
580+
mat2.staging = graph.set_input_tensor(mat2.value);
581+
}
582+
583+
IOValueRef out;
584+
out.value = graph.add_tensor(out_size, dtype, in_out_stype, memory_layout);
585+
586+
VK_GET_OP_FN("aten.mm.default")(graph, {mat1.value, mat2.value, out.value});
587+
588+
out.staging = graph.set_output_tensor(out.value);
589+
590+
return graph;
591+
}
592+
540593
bool check_close(float a, float b, float atol, float rtol) {
541594
float max = std::max(std::abs(a), std::abs(b));
542595
float diff = std::abs(a - b);

backends/vulkan/test/utils/test_utils.h

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
#pragma once
1010

11+
#include <random>
12+
1113
#include <gtest/gtest.h>
1214

1315
#include <executorch/backends/vulkan/runtime/api/api.h>
@@ -16,6 +18,8 @@
1618
#include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>
1719
#include <executorch/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h>
1820

21+
#include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>
22+
1923
#define CREATE_FLOAT_TEXTURE(sizes, allocate_memory) \
2024
vkcompute::api::vTensor( \
2125
vkcompute::api::context(), \
@@ -135,6 +139,22 @@ void record_matmul_texture3d(
135139
// Input & Output Utilities
136140
//
137141

142+
inline std::vector<float> create_random_float_vector(
143+
const size_t numel,
144+
const float min = 0.0f,
145+
const float max = 1.0f) {
146+
std::vector<float> result(numel);
147+
std::random_device rd;
148+
std::mt19937 gen(rd());
149+
std::uniform_real_distribution<float> dis(min, max);
150+
151+
for (size_t i = 0; i < numel; ++i) {
152+
result[i] = dis(gen);
153+
}
154+
155+
return result;
156+
}
157+
138158
inline void fill_staging(
139159
vkcompute::api::StagingBuffer& staging,
140160
float val,
@@ -232,6 +252,22 @@ void execute_graph_and_check_output(
232252
std::vector<float> input_vals,
233253
std::vector<float> expected_outputs);
234254

255+
#define CREATE_RAND_WEIGHT_TENSOR(name, sizes, dtype) \
256+
std::vector<float> data_##name = \
257+
create_random_float_buffer(utils::multiply_integers(sizes)); \
258+
ValueRef name = graph.add_tensorref(sizes, dtype, data_##name.data());
259+
260+
vkcompute::ComputeGraph build_mm_graph(
261+
int B,
262+
int M,
263+
int K,
264+
int N,
265+
vkcompute::vkapi::ScalarType dtype,
266+
vkcompute::utils::StorageType in_out_stype,
267+
vkcompute::utils::GPUMemoryLayout memory_layout,
268+
const bool prepack_mat2 = false,
269+
const float mat2_val = 0.0f);
270+
235271
//
236272
// Debugging Utilities
237273
//

backends/vulkan/test/vulkan_compute_api_test.cpp

Lines changed: 56 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2753,43 +2753,8 @@ void test_mm(
27532753
utils::StorageType storage_type,
27542754
utils::GPUMemoryLayout memory_layout,
27552755
bool prepack = true) {
2756-
GraphConfig config;
2757-
config.set_storage_type_override(storage_type);
2758-
ComputeGraph graph(config);
2759-
2760-
std::vector<int64_t> mat1_size = {M, K};
2761-
std::vector<int64_t> mat2_size = {K, N};
2762-
std::vector<int64_t> out_size = {M, N};
2763-
if (B > 1) {
2764-
mat1_size.resize(3);
2765-
mat1_size = {B, M, K};
2766-
mat2_size.resize(3);
2767-
mat2_size = {B, K, N};
2768-
out_size.resize(3);
2769-
out_size = {B, M, N};
2770-
}
2771-
2772-
IOValueRef mat2{};
2773-
2774-
CREATE_WEIGHT_TENSOR(mat2_w, mat2_size, dtype, 2.0f);
2775-
2776-
// Build graph
2777-
2778-
IOValueRef mat1 = graph.add_input_tensor(mat1_size, dtype, memory_layout);
2779-
2780-
if (prepack) {
2781-
mat2.value = mat2_w;
2782-
} else {
2783-
mat2.value = graph.add_tensor(mat2_size, dtype, memory_layout);
2784-
mat2.staging = graph.set_input_tensor(mat2.value);
2785-
}
2786-
2787-
IOValueRef out;
2788-
out.value = graph.add_tensor(out_size, dtype, memory_layout);
2789-
2790-
VK_GET_OP_FN("aten.mm.default")(graph, {mat1.value, mat2.value, out.value});
2791-
2792-
out.staging = graph.set_output_tensor(out.value);
2756+
ComputeGraph graph = build_mm_graph(
2757+
B, M, K, N, dtype, storage_type, memory_layout, prepack, 2.0f);
27932758

27942759
graph.prepare();
27952760
graph.encode_prepack();
@@ -2855,6 +2820,60 @@ TEST(VulkanComputeGraphOpsTest, mm_smoke_test) {
28552820
#undef RUN_TESTS
28562821
}
28572822

2823+
void test_mm_with_resize_reencode(
2824+
int B,
2825+
int M,
2826+
int K,
2827+
int N,
2828+
vkapi::ScalarType dtype,
2829+
utils::StorageType storage_type,
2830+
utils::GPUMemoryLayout memory_layout) {
2831+
ASSERT_TRUE(M > 1);
2832+
2833+
ComputeGraph graph = build_mm_graph(
2834+
B, M, K, N, dtype, storage_type, memory_layout, false, 2.0f);
2835+
2836+
graph.prepare();
2837+
graph.encode_prepack();
2838+
graph.prepack();
2839+
graph.encode_execute();
2840+
2841+
for (int i = 1; i < 4; i++) {
2842+
float val_mat1 = i;
2843+
float val_mat2 = i + 1;
2844+
float val_out = K * (val_mat1 * val_mat2);
2845+
execute_graph_and_check_output(graph, {val_mat1, val_mat2}, {val_out});
2846+
}
2847+
2848+
// Switch to GEMV mode
2849+
int new_K = K / 2;
2850+
std::vector<int64_t> new_mat1_size = {1, new_K};
2851+
std::vector<int64_t> new_mat2_size = {new_K, N};
2852+
graph.resize_input(0, new_mat1_size);
2853+
graph.resize_input(1, new_mat2_size);
2854+
graph.propagate_resize();
2855+
2856+
graph.encode_execute();
2857+
2858+
for (int i = 1; i < 4; i++) {
2859+
float val_mat1 = i;
2860+
float val_mat2 = i + 1;
2861+
float val_out = new_K * (val_mat1 * val_mat2);
2862+
execute_graph_and_check_output(graph, {val_mat1, val_mat2}, {val_out});
2863+
}
2864+
}
2865+
2866+
TEST(VulkanComputeGraphOpsTest, test_graph_resize_reencode) {
2867+
test_mm_with_resize_reencode(
2868+
/*B = */ 1,
2869+
/*M = */ 31,
2870+
/*K = */ 127,
2871+
/*N = */ 23,
2872+
vkapi::kFloat,
2873+
utils::kTexture3D,
2874+
utils::kWidthPacked);
2875+
}
2876+
28582877
void test_max_pool2d(
28592878
const std::vector<int64_t>& in_size,
28602879
const int64_t base_val,

0 commit comments

Comments
 (0)