pytorch · facebook-github-bot · May 20, 2025 · May 19, 2025 · May 19, 2025 · May 20, 2025
@@ -612,6 +612,11 @@ void ComputeGraph::prepare() {
   if (config_.enable_querypool) {
     context_->initialize_querypool();
   }
+
+  for (SharedObject& shared_object : shared_objects_) {
+    shared_object.allocate(this);
+    shared_object.bind_users(this);
+  }
 }
 
 void ComputeGraph::encode_prepack() {
@@ -636,11 +641,6 @@ void ComputeGraph::encode_execute() {
 
   context_->cmd_reset_querypool();
 
-  for (SharedObject& shared_object : shared_objects_) {
-    shared_object.allocate(this);
-    shared_object.bind_users(this);
-  }
-
   for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
     node->encode(this);
   }

@@ -537,6 +537,59 @@ void execute_graph_and_check_output(
   }
 }
 
+vkcompute::ComputeGraph build_mm_graph(
+    int B,
+    int M,
+    int K,
+    int N,
+    vkcompute::vkapi::ScalarType dtype,
+    vkcompute::utils::StorageType in_out_stype,
+    vkcompute::utils::GPUMemoryLayout memory_layout,
+    const bool prepack_mat2,
+    const float mat2_val) {
+  using namespace vkcompute;
+  GraphConfig config;
+  ComputeGraph graph(config);
+
+  std::vector<int64_t> mat1_size = {M, K};
+  std::vector<int64_t> mat2_size = {K, N};
+  std::vector<int64_t> out_size = {M, N};
+  if (B > 1) {
+    mat1_size.resize(3);
+    mat1_size = {B, M, K};
+    mat2_size.resize(3);
+    mat2_size = {B, K, N};
+    out_size.resize(3);
+    out_size = {B, M, N};
+  }
+
+  IOValueRef mat1 =
+      graph.add_input_tensor(mat1_size, dtype, in_out_stype, memory_layout);
+  IOValueRef mat2{};
+
+  CREATE_RAND_WEIGHT_TENSOR(mat2_w, mat2_size, dtype);
+  if (mat2_val != 0.0f) {
+    std::fill(data_mat2_w.begin(), data_mat2_w.end(), mat2_val);
+  }
+
+  if (prepack_mat2) {
+    mat2.value = mat2_w;
+  } else {
+    mat2.value =
+        graph.add_tensor(mat2_size, dtype, in_out_stype, memory_layout);
+    mat2.staging = graph.set_input_tensor(mat2.value);
+  }
+
+  IOValueRef out;
+  out.value = graph.add_tensor(out_size, dtype, in_out_stype, memory_layout);
+
+  VK_GET_OP_FN("aten.mm.default")(graph, {mat1.value, mat2.value, out.value});
+
+  out.staging = graph.set_output_tensor(out.value);
+
+  return graph;
+}
+
 bool check_close(float a, float b, float atol, float rtol) {
   float max = std::max(std::abs(a), std::abs(b));
   float diff = std::abs(a - b);

@@ -8,6 +8,8 @@
 
 #pragma once
 
+#include <random>
+
 #include <gtest/gtest.h>
 
 #include <executorch/backends/vulkan/runtime/api/api.h>
@@ -16,6 +18,8 @@
 #include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>
 #include <executorch/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h>
 
+#include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>
+
 #define CREATE_FLOAT_TEXTURE(sizes, allocate_memory)  \
   vkcompute::api::vTensor(                            \
       vkcompute::api::context(),                      \
@@ -135,6 +139,22 @@ void record_matmul_texture3d(
 // Input & Output Utilities
 //
 
+inline std::vector<float> create_random_float_vector(
+    const size_t numel,
+    const float min = 0.0f,
+    const float max = 1.0f) {
+  std::vector<float> result(numel);
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_real_distribution<float> dis(min, max);
+
+  for (size_t i = 0; i < numel; ++i) {
+    result[i] = dis(gen);
+  }
+
+  return result;
+}
+
 inline void fill_staging(
     vkcompute::api::StagingBuffer& staging,
     float val,
@@ -232,6 +252,22 @@ void execute_graph_and_check_output(
     std::vector<float> input_vals,
     std::vector<float> expected_outputs);
 
+#define CREATE_RAND_WEIGHT_TENSOR(name, sizes, dtype)              \
+  std::vector<float> data_##name =                                 \
+      create_random_float_buffer(utils::multiply_integers(sizes)); \
+  ValueRef name = graph.add_tensorref(sizes, dtype, data_##name.data());
+
+vkcompute::ComputeGraph build_mm_graph(
+    int B,
+    int M,
+    int K,
+    int N,
+    vkcompute::vkapi::ScalarType dtype,
+    vkcompute::utils::StorageType in_out_stype,
+    vkcompute::utils::GPUMemoryLayout memory_layout,
+    const bool prepack_mat2 = false,
+    const float mat2_val = 0.0f);
+
 //
 // Debugging Utilities
 //

@@ -2753,43 +2753,8 @@ void test_mm(
     utils::StorageType storage_type,
     utils::GPUMemoryLayout memory_layout,
     bool prepack = true) {
-  GraphConfig config;
-  config.set_storage_type_override(storage_type);
-  ComputeGraph graph(config);
-
-  std::vector<int64_t> mat1_size = {M, K};
-  std::vector<int64_t> mat2_size = {K, N};
-  std::vector<int64_t> out_size = {M, N};
-  if (B > 1) {
-    mat1_size.resize(3);
-    mat1_size = {B, M, K};
-    mat2_size.resize(3);
-    mat2_size = {B, K, N};
-    out_size.resize(3);
-    out_size = {B, M, N};
-  }
-
-  IOValueRef mat2{};
-
-  CREATE_WEIGHT_TENSOR(mat2_w, mat2_size, dtype, 2.0f);
-
-  // Build graph
-
-  IOValueRef mat1 = graph.add_input_tensor(mat1_size, dtype, memory_layout);
-
-  if (prepack) {
-    mat2.value = mat2_w;
-  } else {
-    mat2.value = graph.add_tensor(mat2_size, dtype, memory_layout);
-    mat2.staging = graph.set_input_tensor(mat2.value);
-  }
-
-  IOValueRef out;
-  out.value = graph.add_tensor(out_size, dtype, memory_layout);
-
-  VK_GET_OP_FN("aten.mm.default")(graph, {mat1.value, mat2.value, out.value});
-
-  out.staging = graph.set_output_tensor(out.value);
+  ComputeGraph graph = build_mm_graph(
+      B, M, K, N, dtype, storage_type, memory_layout, prepack, 2.0f);
 
   graph.prepare();
   graph.encode_prepack();
@@ -2855,6 +2820,60 @@ TEST(VulkanComputeGraphOpsTest, mm_smoke_test) {
 #undef RUN_TESTS
 }
 
+void test_mm_with_resize_reencode(
+    int B,
+    int M,
+    int K,
+    int N,
+    vkapi::ScalarType dtype,
+    utils::StorageType storage_type,
+    utils::GPUMemoryLayout memory_layout) {
+  ASSERT_TRUE(M > 1);
+
+  ComputeGraph graph = build_mm_graph(
+      B, M, K, N, dtype, storage_type, memory_layout, false, 2.0f);
+
+  graph.prepare();
+  graph.encode_prepack();
+  graph.prepack();
+  graph.encode_execute();
+
+  for (int i = 1; i < 4; i++) {
+    float val_mat1 = i;
+    float val_mat2 = i + 1;
+    float val_out = K * (val_mat1 * val_mat2);
+    execute_graph_and_check_output(graph, {val_mat1, val_mat2}, {val_out});
+  }
+
+  // Switch to GEMV mode
+  int new_K = K / 2;
+  std::vector<int64_t> new_mat1_size = {1, new_K};
+  std::vector<int64_t> new_mat2_size = {new_K, N};
+  graph.resize_input(0, new_mat1_size);
+  graph.resize_input(1, new_mat2_size);
+  graph.propagate_resize();
+
+  graph.encode_execute();
+
+  for (int i = 1; i < 4; i++) {
+    float val_mat1 = i;
+    float val_mat2 = i + 1;
+    float val_out = new_K * (val_mat1 * val_mat2);
+    execute_graph_and_check_output(graph, {val_mat1, val_mat2}, {val_out});
+  }
+}
+
+TEST(VulkanComputeGraphOpsTest, test_graph_resize_reencode) {
+  test_mm_with_resize_reencode(
+      /*B = */ 1,
+      /*M = */ 31,
+      /*K = */ 127,
+      /*N = */ 23,
+      vkapi::kFloat,
+      utils::kTexture3D,
+      utils::kWidthPacked);
+}
+
 void test_max_pool2d(
     const std::vector<int64_t>& in_size,
     const int64_t base_val,