[ET-VK][ez] Test command buffer re-encoding on resize

SS-JIA · SS-JIA · commit 98f0f5bd92f5 · 2025-05-19T12:31:54.000-07:00
Pull Request resolved: #10978 ## Context Add a test where `encode_execute()` is called again after resizing model inputs and propagating the new sizes. Currently, dynamic shapes are handled by simply updating the tensor metadata when sizes are updated. Compute shaders will perform the same computations with the updated tensor sizes/strides information. However, for some operators, different input sizes require different compute shaders in order to achieve maximum performance. One example of this is for matrix multiplication, where matrix-matrix multiplication typically uses a different algorithm than vector-matrix (or matrix-vector) multiplication. Therefore, for some models, it would be best to trigger a re-encoding of the command buffer upon input resize, so that different compute shaders can be selected based on the current input sizes. The actual changes for enabling shader re-selection will be introduced in the next diff. This diff simply checks that command buffer re-encoding "works as advertised". ## Changes This diff simply adds a test in `vulkan_compute_api_test` to test whether the ComputeGraph API can handle the `encode_execute` function being called multiple times. ghstack-source-id: 284913868 @exported-using-ghexport Differential Revision: [D75013781](https://our.internmc.facebook.com/intern/diff/D75013781/)
diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp
@@ -612,6 +612,11 @@ void ComputeGraph::prepare() {
   if (config_.enable_querypool) {
     context_->initialize_querypool();
   }
+
+  for (SharedObject& shared_object : shared_objects_) {
+    shared_object.allocate(this);
+    shared_object.bind_users(this);
+  }
 }
 
 void ComputeGraph::encode_prepack() {
@@ -636,11 +641,6 @@ void ComputeGraph::encode_execute() {
 
   context_->cmd_reset_querypool();
 
-  for (SharedObject& shared_object : shared_objects_) {
-    shared_object.allocate(this);
-    shared_object.bind_users(this);
-  }
-
   for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
     node->encode(this);
   }
diff --git a/backends/vulkan/test/utils/test_utils.cpp b/backends/vulkan/test/utils/test_utils.cpp
@@ -537,6 +537,59 @@ void execute_graph_and_check_output(
   }
 }
 
+vkcompute::ComputeGraph build_mm_graph(
+    int B,
+    int M,
+    int K,
+    int N,
+    vkcompute::vkapi::ScalarType dtype,
+    vkcompute::utils::StorageType in_out_stype,
+    vkcompute::utils::GPUMemoryLayout memory_layout,
+    const bool prepack_mat2,
+    const float mat2_val) {
+  using namespace vkcompute;
+  GraphConfig config;
+  ComputeGraph graph(config);
+
+  std::vector<int64_t> mat1_size = {M, K};
+  std::vector<int64_t> mat2_size = {K, N};
+  std::vector<int64_t> out_size = {M, N};
+  if (B > 1) {
+    mat1_size.resize(3);
+    mat1_size = {B, M, K};
+    mat2_size.resize(3);
+    mat2_size = {B, K, N};
+    out_size.resize(3);
+    out_size = {B, M, N};
+  }
+
+  IOValueRef mat1 =
+      graph.add_input_tensor(mat1_size, dtype, in_out_stype, memory_layout);
+  IOValueRef mat2{};
+
+  CREATE_RAND_WEIGHT_TENSOR(mat2_w, mat2_size, dtype);
+  if (mat2_val != 0.0f) {
+    std::fill(data_mat2_w.begin(), data_mat2_w.end(), mat2_val);
+  }
+
+  if (prepack_mat2) {
+    mat2.value = mat2_w;
+  } else {
+    mat2.value =
+        graph.add_tensor(mat2_size, dtype, in_out_stype, memory_layout);
+    mat2.staging = graph.set_input_tensor(mat2.value);
+  }
+
+  IOValueRef out;
+  out.value = graph.add_tensor(out_size, dtype, in_out_stype, memory_layout);
+
+  VK_GET_OP_FN("aten.mm.default")(graph, {mat1.value, mat2.value, out.value});
+
+  out.staging = graph.set_output_tensor(out.value);
+
+  return graph;
+}
+
 bool check_close(float a, float b, float atol, float rtol) {
   float max = std::max(std::abs(a), std::abs(b));
   float diff = std::abs(a - b);
diff --git a/backends/vulkan/test/utils/test_utils.h b/backends/vulkan/test/utils/test_utils.h
@@ -8,6 +8,8 @@
 
 #pragma once
 
+#include <random>
+
 #include <gtest/gtest.h>
 
 #include <executorch/backends/vulkan/runtime/api/api.h>
@@ -16,6 +18,8 @@
 #include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>
 #include <executorch/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h>
 
+#include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>
+
 #define CREATE_FLOAT_TEXTURE(sizes, allocate_memory)  \
   vkcompute::api::vTensor(                            \
       vkcompute::api::context(),                      \
@@ -135,6 +139,22 @@ void record_matmul_texture3d(
 // Input & Output Utilities
 //
 
+inline std::vector<float> create_random_float_vector(
+    const size_t numel,
+    const float min = 0.0f,
+    const float max = 1.0f) {
+  std::vector<float> result(numel);
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_real_distribution<float> dis(min, max);
+
+  for (size_t i = 0; i < numel; ++i) {
+    result[i] = dis(gen);
+  }
+
+  return result;
+}
+
 inline void fill_staging(
     vkcompute::api::StagingBuffer& staging,
     float val,
@@ -232,6 +252,22 @@ void execute_graph_and_check_output(
     std::vector<float> input_vals,
     std::vector<float> expected_outputs);
 
+#define CREATE_RAND_WEIGHT_TENSOR(name, sizes, dtype)              \
+  std::vector<float> data_##name =                                 \
+      create_random_float_buffer(utils::multiply_integers(sizes)); \
+  ValueRef name = graph.add_tensorref(sizes, dtype, data_##name.data());
+
+vkcompute::ComputeGraph build_mm_graph(
+    int B,
+    int M,
+    int K,
+    int N,
+    vkcompute::vkapi::ScalarType dtype,
+    vkcompute::utils::StorageType in_out_stype,
+    vkcompute::utils::GPUMemoryLayout memory_layout,
+    const bool prepack_mat2 = false,
+    const float mat2_val = 0.0f);
+
 //
 // Debugging Utilities
 //
diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp
@@ -2753,43 +2753,8 @@ void test_mm(
     utils::StorageType storage_type,
     utils::GPUMemoryLayout memory_layout,
     bool prepack = true) {
-  GraphConfig config;
-  config.set_storage_type_override(storage_type);
-  ComputeGraph graph(config);
-
-  std::vector<int64_t> mat1_size = {M, K};
-  std::vector<int64_t> mat2_size = {K, N};
-  std::vector<int64_t> out_size = {M, N};
-  if (B > 1) {
-    mat1_size.resize(3);
-    mat1_size = {B, M, K};
-    mat2_size.resize(3);
-    mat2_size = {B, K, N};
-    out_size.resize(3);
-    out_size = {B, M, N};
-  }
-
-  IOValueRef mat2{};
-
-  CREATE_WEIGHT_TENSOR(mat2_w, mat2_size, dtype, 2.0f);
-
-  // Build graph
-
-  IOValueRef mat1 = graph.add_input_tensor(mat1_size, dtype, memory_layout);
-
-  if (prepack) {
-    mat2.value = mat2_w;
-  } else {
-    mat2.value = graph.add_tensor(mat2_size, dtype, memory_layout);
-    mat2.staging = graph.set_input_tensor(mat2.value);
-  }
-
-  IOValueRef out;
-  out.value = graph.add_tensor(out_size, dtype, memory_layout);
-
-  VK_GET_OP_FN("aten.mm.default")(graph, {mat1.value, mat2.value, out.value});
-
-  out.staging = graph.set_output_tensor(out.value);
+  ComputeGraph graph = build_mm_graph(
+      B, M, K, N, dtype, storage_type, memory_layout, prepack, 2.0f);
 
   graph.prepare();
   graph.encode_prepack();
@@ -2855,6 +2820,60 @@ TEST(VulkanComputeGraphOpsTest, mm_smoke_test) {
 #undef RUN_TESTS
 }
 
+void test_mm_with_resize_reencode(
+    int B,
+    int M,
+    int K,
+    int N,
+    vkapi::ScalarType dtype,
+    utils::StorageType storage_type,
+    utils::GPUMemoryLayout memory_layout) {
+  ASSERT_TRUE(M > 1);
+
+  ComputeGraph graph = build_mm_graph(
+      B, M, K, N, dtype, storage_type, memory_layout, false, 2.0f);
+
+  graph.prepare();
+  graph.encode_prepack();
+  graph.prepack();
+  graph.encode_execute();
+
+  for (int i = 1; i < 4; i++) {
+    float val_mat1 = i;
+    float val_mat2 = i + 1;
+    float val_out = K * (val_mat1 * val_mat2);
+    execute_graph_and_check_output(graph, {val_mat1, val_mat2}, {val_out});
+  }
+
+  // Switch to GEMV mode
+  int new_K = K / 2;
+  std::vector<int64_t> new_mat1_size = {1, new_K};
+  std::vector<int64_t> new_mat2_size = {new_K, N};
+  graph.resize_input(0, new_mat1_size);
+  graph.resize_input(1, new_mat2_size);
+  graph.propagate_resize();
+
+  graph.encode_execute();
+
+  for (int i = 1; i < 4; i++) {
+    float val_mat1 = i;
+    float val_mat2 = i + 1;
+    float val_out = new_K * (val_mat1 * val_mat2);
+    execute_graph_and_check_output(graph, {val_mat1, val_mat2}, {val_out});
+  }
+}
+
+TEST(VulkanComputeGraphOpsTest, test_graph_resize_reencode) {
+  test_mm_with_resize_reencode(
+      /*B = */ 1,
+      /*M = */ 31,
+      /*K = */ 127,
+      /*N = */ 23,
+      vkapi::kFloat,
+      utils::kTexture3D,
+      utils::kWidthPacked);
+}
+
 void test_max_pool2d(
     const std::vector<int64_t>& in_size,
     const int64_t base_val,