pytorch · SS-JIA · May 20, 2025 · May 20, 2025 · May 20, 2025
@@ -612,6 +612,11 @@ void ComputeGraph::prepare() {
   if (config_.enable_querypool) {
     context_->initialize_querypool();
   }
+
+  for (SharedObject& shared_object : shared_objects_) {
+    shared_object.allocate(this);
+    shared_object.bind_users(this);
+  }
 }
 
 void ComputeGraph::encode_prepack() {
@@ -636,11 +641,6 @@ void ComputeGraph::encode_execute() {
 
   context_->cmd_reset_querypool();
 
-  for (SharedObject& shared_object : shared_objects_) {
-    shared_object.allocate(this);
-    shared_object.bind_users(this);
-  }
-
   for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
     node->encode(this);
   }

@@ -21,6 +21,7 @@
 #include <executorch/backends/vulkan/runtime/graph/containers/Value.h>
 
 #include <executorch/backends/vulkan/runtime/graph/ops/DispatchNode.h>
+#include <executorch/backends/vulkan/runtime/graph/ops/DynamicDispatchNode.h>
 #include <executorch/backends/vulkan/runtime/graph/ops/ExecuteNode.h>
 #include <executorch/backends/vulkan/runtime/graph/ops/PrepackNode.h>
 

@@ -22,7 +22,7 @@ class ComputeGraph;
 /*
  * Represents a single shader execution op in a ML model.
  */
-class DispatchNode final : public ExecuteNode {
+class DispatchNode : public ExecuteNode {
   friend class ComputeGraph;
 
  public:
@@ -43,9 +43,9 @@ class DispatchNode final : public ExecuteNode {
   void encode(ComputeGraph* graph) override;
 
  protected:
-  const vkapi::ShaderInfo shader_;
-  const utils::uvec3 global_workgroup_size_;
-  const utils::WorkgroupSize local_workgroup_size_;
+  vkapi::ShaderInfo shader_;
+  utils::uvec3 global_workgroup_size_;
+  utils::WorkgroupSize local_workgroup_size_;
   const vkapi::ParamsBindList params_;
   const vkapi::SpecVarList spec_vars_;
   const std::vector<PushConstantDataInfo> push_constants_;

@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/backends/vulkan/runtime/graph/ops/DynamicDispatchNode.h>
+
+#include <executorch/backends/vulkan/runtime/graph/ComputeGraph.h>
+
+namespace vkcompute {
+
+DynamicDispatchNode::DynamicDispatchNode(
+    ComputeGraph& graph,
+    const PickShaderFn& pick_shader_fn,
+    const PickGlobalFn& pick_global_wg_fn,
+    const PickLocalFn& pick_local_wg_fn,
+    const std::vector<ArgGroup>& args,
+    const vkapi::ParamsBindList& params,
+    const std::vector<PushConstantDataInfo>& push_constants,
+    const vkapi::SpecVarList& spec_vars,
+    const std::vector<ValueRef>& resize_args,
+    const ResizeFunction& resize_fn)
+    : DispatchNode(
+          graph,
+          pick_shader_fn(&graph, args, resize_args),
+          pick_global_wg_fn(&graph, args, resize_args),
+          pick_local_wg_fn(&graph, args, resize_args),
+          args,
+          params,
+          push_constants,
+          spec_vars,
+          resize_args,
+          resize_fn),
+      pick_shader_fn_(pick_shader_fn),
+      pick_global_wg_fn_(pick_global_wg_fn),
+      pick_local_wg_fn_(pick_local_wg_fn) {}
+
+void DynamicDispatchNode::encode(ComputeGraph* graph) {
+  shader_ = pick_shader_fn_(graph, args_, resize_args_);
+  global_workgroup_size_ = pick_global_wg_fn_(graph, args_, resize_args_);
+  local_workgroup_size_ =
+      utils::WorkgroupSize(pick_local_wg_fn_(graph, args_, resize_args_));
+  DispatchNode::encode(graph);
+}
+
+} // namespace vkcompute
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/backends/vulkan/runtime/api/api.h>
+
+#include <executorch/backends/vulkan/runtime/graph/containers/PushConstantData.h>
+#include <executorch/backends/vulkan/runtime/graph/containers/Value.h>
+
+#include <executorch/backends/vulkan/runtime/graph/ops/DispatchNode.h>
+
+namespace vkcompute {
+
+class ComputeGraph;
+
+/*
+ * Represents a single shader execution op in a ML model.
+ */
+class DynamicDispatchNode final : public DispatchNode {
+  friend class ComputeGraph;
+
+ public:
+  using PickShaderFn = const std::function<vkapi::ShaderInfo(
+      ComputeGraph*,
+      const std::vector<ArgGroup>&,
+      const std::vector<ValueRef>&)>;
+  using PickGlobalFn = const std::function<utils::uvec3(
+      ComputeGraph*,
+      const std::vector<ArgGroup>&,
+      const std::vector<ValueRef>&)>;
+  using PickLocalFn = const std::function<utils::uvec3(
+      ComputeGraph*,
+      const std::vector<ArgGroup>&,
+      const std::vector<ValueRef>&)>;
+
+  explicit DynamicDispatchNode(
+      ComputeGraph& graph,
+      const PickShaderFn& pick_shader_fn,
+      const PickGlobalFn& pick_global_wg_fn,
+      const PickLocalFn& pick_local_wg_fn,
+      const std::vector<ArgGroup>& args,
+      const vkapi::ParamsBindList& params,
+      const std::vector<PushConstantDataInfo>& push_constants,
+      const vkapi::SpecVarList& spec_vars,
+      const std::vector<ValueRef>& resize_args,
+      const ResizeFunction& resize_fn = nullptr);
+
+  ~DynamicDispatchNode() override = default;
+
+  void encode(ComputeGraph* graph) override;
+
+ protected:
+  const PickShaderFn pick_shader_fn_;
+  const PickGlobalFn pick_global_wg_fn_;
+  const PickLocalFn pick_local_wg_fn_;
+
+ public:
+  operator bool() const {
+    return shader_;
+  }
+};
+
+} // namespace vkcompute
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#version 450 core
+
+#define PRECISION ${PRECISION}
+
+layout(std430) buffer;
+
+${layout_declare_tensor(0, "w", "t_out", "float", "texture3d")}
+${layout_declare_tensor(1, "r", "t_in1", "float", "texture3d")}
+${layout_declare_tensor(2, "r", "t_in2", "float", "texture3d")}
+
+layout(push_constant) uniform restrict Block {
+  ivec4 out_sizes;
+  ivec4 in1_sizes;
+  ivec4 in2_sizes;
+};
+
+layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
+
+void main() {
+  const ivec3 pos = ivec3(gl_GlobalInvocationID);
+
+  if (any(greaterThanEqual(pos, out_sizes.xyz))) {
+    return;
+  }
+
+
+  vec4 out_texel = vec4(0.0);
+  for (int row = 0; row < in1_sizes.y; ++row) {
+    ivec3 in_pos = ivec3(pos.x, row, pos.z);
+    vec4 in1_texel = texelFetch(t_in1, in_pos, 0);
+    vec4 in2_texel = texelFetch(t_in2, in_pos, 0);
+
+    out_texel += in1_texel * in2_texel;
+  }
+
+  imageStore(t_out, pos, out_texel + ${OFFSET});
+}
@@ -0,0 +1,7 @@
+dynamic_dispatch_test:
+  parameter_names_with_default_values:
+    OFFSET: 2.25
+  shader_variants:
+    - NAME: dynamic_dispatch_test_var1
+    - NAME: dynamic_dispatch_test_var2
+      OFFSET: 5.5
@@ -537,6 +537,59 @@ void execute_graph_and_check_output(
   }
 }
 
+vkcompute::ComputeGraph build_mm_graph(
+    int B,
+    int M,
+    int K,
+    int N,
+    vkcompute::vkapi::ScalarType dtype,
+    vkcompute::utils::StorageType in_out_stype,
+    vkcompute::utils::GPUMemoryLayout memory_layout,
+    const bool prepack_mat2,
+    const float mat2_val) {
+  using namespace vkcompute;
+  GraphConfig config;
+  ComputeGraph graph(config);
+
+  std::vector<int64_t> mat1_size = {M, K};
+  std::vector<int64_t> mat2_size = {K, N};
+  std::vector<int64_t> out_size = {M, N};
+  if (B > 1) {
+    mat1_size.resize(3);
+    mat1_size = {B, M, K};
+    mat2_size.resize(3);
+    mat2_size = {B, K, N};
+    out_size.resize(3);
+    out_size = {B, M, N};
+  }
+
+  IOValueRef mat1 =
+      graph.add_input_tensor(mat1_size, dtype, in_out_stype, memory_layout);
+  IOValueRef mat2{};
+
+  CREATE_RAND_WEIGHT_TENSOR(mat2_w, mat2_size, dtype);
+  if (mat2_val != 0.0f) {
+    std::fill(data_mat2_w.begin(), data_mat2_w.end(), mat2_val);
+  }
+
+  if (prepack_mat2) {
+    mat2.value = mat2_w;
+  } else {
+    mat2.value =
+        graph.add_tensor(mat2_size, dtype, in_out_stype, memory_layout);
+    mat2.staging = graph.set_input_tensor(mat2.value);
+  }
+
+  IOValueRef out;
+  out.value = graph.add_tensor(out_size, dtype, in_out_stype, memory_layout);
+
+  VK_GET_OP_FN("aten.mm.default")(graph, {mat1.value, mat2.value, out.value});
+
+  out.staging = graph.set_output_tensor(out.value);
+
+  return graph;
+}
+
 bool check_close(float a, float b, float atol, float rtol) {
   float max = std::max(std::abs(a), std::abs(b));
   float diff = std::abs(a - b);

@@ -8,6 +8,8 @@
 
 #pragma once
 
+#include <random>
+
 #include <gtest/gtest.h>
 
 #include <executorch/backends/vulkan/runtime/api/api.h>
@@ -16,6 +18,8 @@
 #include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>
 #include <executorch/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h>
 
+#include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>
+
 #define CREATE_FLOAT_TEXTURE(sizes, allocate_memory)  \
   vkcompute::api::vTensor(                            \
       vkcompute::api::context(),                      \
@@ -135,6 +139,22 @@ void record_matmul_texture3d(
 // Input & Output Utilities
 //
 
+inline std::vector<float> create_random_float_vector(
+    const size_t numel,
+    const float min = 0.0f,
+    const float max = 1.0f) {
+  std::vector<float> result(numel);
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_real_distribution<float> dis(min, max);
+
+  for (size_t i = 0; i < numel; ++i) {
+    result[i] = dis(gen);
+  }
+
+  return result;
+}
+
 inline void fill_staging(
     vkcompute::api::StagingBuffer& staging,
     float val,
@@ -232,6 +252,22 @@ void execute_graph_and_check_output(
     std::vector<float> input_vals,
     std::vector<float> expected_outputs);
 
+#define CREATE_RAND_WEIGHT_TENSOR(name, sizes, dtype)              \
+  std::vector<float> data_##name =                                 \
+      create_random_float_buffer(utils::multiply_integers(sizes)); \
+  ValueRef name = graph.add_tensorref(sizes, dtype, data_##name.data());
+
+vkcompute::ComputeGraph build_mm_graph(
+    int B,
+    int M,
+    int K,
+    int N,
+    vkcompute::vkapi::ScalarType dtype,
+    vkcompute::utils::StorageType in_out_stype,
+    vkcompute::utils::GPUMemoryLayout memory_layout,
+    const bool prepack_mat2 = false,
+    const float mat2_val = 0.0f);
+
 //
 // Debugging Utilities
 //