Skip to content

[ET-VK] Introduce DynamicDispatchNode #11000

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions backends/vulkan/runtime/graph/ComputeGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,11 @@ void ComputeGraph::prepare() {
if (config_.enable_querypool) {
context_->initialize_querypool();
}

for (SharedObject& shared_object : shared_objects_) {
shared_object.allocate(this);
shared_object.bind_users(this);
}
}

void ComputeGraph::encode_prepack() {
Expand All @@ -636,11 +641,6 @@ void ComputeGraph::encode_execute() {

context_->cmd_reset_querypool();

for (SharedObject& shared_object : shared_objects_) {
shared_object.allocate(this);
shared_object.bind_users(this);
}

for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
node->encode(this);
}
Expand Down
1 change: 1 addition & 0 deletions backends/vulkan/runtime/graph/ComputeGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <executorch/backends/vulkan/runtime/graph/containers/Value.h>

#include <executorch/backends/vulkan/runtime/graph/ops/DispatchNode.h>
#include <executorch/backends/vulkan/runtime/graph/ops/DynamicDispatchNode.h>
#include <executorch/backends/vulkan/runtime/graph/ops/ExecuteNode.h>
#include <executorch/backends/vulkan/runtime/graph/ops/PrepackNode.h>

Expand Down
8 changes: 4 additions & 4 deletions backends/vulkan/runtime/graph/ops/DispatchNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class ComputeGraph;
/*
* Represents a single shader execution op in a ML model.
*/
class DispatchNode final : public ExecuteNode {
class DispatchNode : public ExecuteNode {
friend class ComputeGraph;

public:
Expand All @@ -43,9 +43,9 @@ class DispatchNode final : public ExecuteNode {
void encode(ComputeGraph* graph) override;

protected:
const vkapi::ShaderInfo shader_;
const utils::uvec3 global_workgroup_size_;
const utils::WorkgroupSize local_workgroup_size_;
vkapi::ShaderInfo shader_;
utils::uvec3 global_workgroup_size_;
utils::WorkgroupSize local_workgroup_size_;
const vkapi::ParamsBindList params_;
const vkapi::SpecVarList spec_vars_;
const std::vector<PushConstantDataInfo> push_constants_;
Expand Down
49 changes: 49 additions & 0 deletions backends/vulkan/runtime/graph/ops/DynamicDispatchNode.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/backends/vulkan/runtime/graph/ops/DynamicDispatchNode.h>

#include <executorch/backends/vulkan/runtime/graph/ComputeGraph.h>

namespace vkcompute {

DynamicDispatchNode::DynamicDispatchNode(
ComputeGraph& graph,
const PickShaderFn& pick_shader_fn,
const PickGlobalFn& pick_global_wg_fn,
const PickLocalFn& pick_local_wg_fn,
const std::vector<ArgGroup>& args,
const vkapi::ParamsBindList& params,
const std::vector<PushConstantDataInfo>& push_constants,
const vkapi::SpecVarList& spec_vars,
const std::vector<ValueRef>& resize_args,
const ResizeFunction& resize_fn)
: DispatchNode(
graph,
pick_shader_fn(&graph, args, resize_args),
pick_global_wg_fn(&graph, args, resize_args),
pick_local_wg_fn(&graph, args, resize_args),
args,
params,
push_constants,
spec_vars,
resize_args,
resize_fn),
pick_shader_fn_(pick_shader_fn),
pick_global_wg_fn_(pick_global_wg_fn),
pick_local_wg_fn_(pick_local_wg_fn) {}

void DynamicDispatchNode::encode(ComputeGraph* graph) {
shader_ = pick_shader_fn_(graph, args_, resize_args_);
global_workgroup_size_ = pick_global_wg_fn_(graph, args_, resize_args_);
local_workgroup_size_ =
utils::WorkgroupSize(pick_local_wg_fn_(graph, args_, resize_args_));
DispatchNode::encode(graph);
}

} // namespace vkcompute
69 changes: 69 additions & 0 deletions backends/vulkan/runtime/graph/ops/DynamicDispatchNode.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#pragma once

#include <executorch/backends/vulkan/runtime/api/api.h>

#include <executorch/backends/vulkan/runtime/graph/containers/PushConstantData.h>
#include <executorch/backends/vulkan/runtime/graph/containers/Value.h>

#include <executorch/backends/vulkan/runtime/graph/ops/DispatchNode.h>

namespace vkcompute {

class ComputeGraph;

/*
* Represents a single shader execution op in a ML model.
*/
class DynamicDispatchNode final : public DispatchNode {
friend class ComputeGraph;

public:
using PickShaderFn = const std::function<vkapi::ShaderInfo(
ComputeGraph*,
const std::vector<ArgGroup>&,
const std::vector<ValueRef>&)>;
using PickGlobalFn = const std::function<utils::uvec3(
ComputeGraph*,
const std::vector<ArgGroup>&,
const std::vector<ValueRef>&)>;
using PickLocalFn = const std::function<utils::uvec3(
ComputeGraph*,
const std::vector<ArgGroup>&,
const std::vector<ValueRef>&)>;

explicit DynamicDispatchNode(
ComputeGraph& graph,
const PickShaderFn& pick_shader_fn,
const PickGlobalFn& pick_global_wg_fn,
const PickLocalFn& pick_local_wg_fn,
const std::vector<ArgGroup>& args,
const vkapi::ParamsBindList& params,
const std::vector<PushConstantDataInfo>& push_constants,
const vkapi::SpecVarList& spec_vars,
const std::vector<ValueRef>& resize_args,
const ResizeFunction& resize_fn = nullptr);

~DynamicDispatchNode() override = default;

void encode(ComputeGraph* graph) override;

protected:
const PickShaderFn pick_shader_fn_;
const PickGlobalFn pick_global_wg_fn_;
const PickLocalFn pick_local_wg_fn_;

public:
operator bool() const {
return shader_;
}
};

} // namespace vkcompute
45 changes: 45 additions & 0 deletions backends/vulkan/test/glsl/dynamic_dispatch_test.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#version 450 core

#define PRECISION ${PRECISION}

layout(std430) buffer;

${layout_declare_tensor(0, "w", "t_out", "float", "texture3d")}
${layout_declare_tensor(1, "r", "t_in1", "float", "texture3d")}
${layout_declare_tensor(2, "r", "t_in2", "float", "texture3d")}

layout(push_constant) uniform restrict Block {
ivec4 out_sizes;
ivec4 in1_sizes;
ivec4 in2_sizes;
};

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

void main() {
const ivec3 pos = ivec3(gl_GlobalInvocationID);

if (any(greaterThanEqual(pos, out_sizes.xyz))) {
return;
}


vec4 out_texel = vec4(0.0);
for (int row = 0; row < in1_sizes.y; ++row) {
ivec3 in_pos = ivec3(pos.x, row, pos.z);
vec4 in1_texel = texelFetch(t_in1, in_pos, 0);
vec4 in2_texel = texelFetch(t_in2, in_pos, 0);

out_texel += in1_texel * in2_texel;
}

imageStore(t_out, pos, out_texel + ${OFFSET});
}
7 changes: 7 additions & 0 deletions backends/vulkan/test/glsl/dynamic_dispatch_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
dynamic_dispatch_test:
parameter_names_with_default_values:
OFFSET: 2.25
shader_variants:
- NAME: dynamic_dispatch_test_var1
- NAME: dynamic_dispatch_test_var2
OFFSET: 5.5
53 changes: 53 additions & 0 deletions backends/vulkan/test/utils/test_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,59 @@ void execute_graph_and_check_output(
}
}

vkcompute::ComputeGraph build_mm_graph(
int B,
int M,
int K,
int N,
vkcompute::vkapi::ScalarType dtype,
vkcompute::utils::StorageType in_out_stype,
vkcompute::utils::GPUMemoryLayout memory_layout,
const bool prepack_mat2,
const float mat2_val) {
using namespace vkcompute;
GraphConfig config;
ComputeGraph graph(config);

std::vector<int64_t> mat1_size = {M, K};
std::vector<int64_t> mat2_size = {K, N};
std::vector<int64_t> out_size = {M, N};
if (B > 1) {
mat1_size.resize(3);
mat1_size = {B, M, K};
mat2_size.resize(3);
mat2_size = {B, K, N};
out_size.resize(3);
out_size = {B, M, N};
}

IOValueRef mat1 =
graph.add_input_tensor(mat1_size, dtype, in_out_stype, memory_layout);
IOValueRef mat2{};

CREATE_RAND_WEIGHT_TENSOR(mat2_w, mat2_size, dtype);
if (mat2_val != 0.0f) {
std::fill(data_mat2_w.begin(), data_mat2_w.end(), mat2_val);
}

if (prepack_mat2) {
mat2.value = mat2_w;
} else {
mat2.value =
graph.add_tensor(mat2_size, dtype, in_out_stype, memory_layout);
mat2.staging = graph.set_input_tensor(mat2.value);
}

IOValueRef out;
out.value = graph.add_tensor(out_size, dtype, in_out_stype, memory_layout);

VK_GET_OP_FN("aten.mm.default")(graph, {mat1.value, mat2.value, out.value});

out.staging = graph.set_output_tensor(out.value);

return graph;
}

bool check_close(float a, float b, float atol, float rtol) {
float max = std::max(std::abs(a), std::abs(b));
float diff = std::abs(a - b);
Expand Down
36 changes: 36 additions & 0 deletions backends/vulkan/test/utils/test_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

#pragma once

#include <random>

#include <gtest/gtest.h>

#include <executorch/backends/vulkan/runtime/api/api.h>
Expand All @@ -16,6 +18,8 @@
#include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>
#include <executorch/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h>

#include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>

#define CREATE_FLOAT_TEXTURE(sizes, allocate_memory) \
vkcompute::api::vTensor( \
vkcompute::api::context(), \
Expand Down Expand Up @@ -135,6 +139,22 @@ void record_matmul_texture3d(
// Input & Output Utilities
//

inline std::vector<float> create_random_float_vector(
const size_t numel,
const float min = 0.0f,
const float max = 1.0f) {
std::vector<float> result(numel);
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> dis(min, max);

for (size_t i = 0; i < numel; ++i) {
result[i] = dis(gen);
}

return result;
}

inline void fill_staging(
vkcompute::api::StagingBuffer& staging,
float val,
Expand Down Expand Up @@ -232,6 +252,22 @@ void execute_graph_and_check_output(
std::vector<float> input_vals,
std::vector<float> expected_outputs);

#define CREATE_RAND_WEIGHT_TENSOR(name, sizes, dtype) \
std::vector<float> data_##name = \
create_random_float_buffer(utils::multiply_integers(sizes)); \
ValueRef name = graph.add_tensorref(sizes, dtype, data_##name.data());

vkcompute::ComputeGraph build_mm_graph(
int B,
int M,
int K,
int N,
vkcompute::vkapi::ScalarType dtype,
vkcompute::utils::StorageType in_out_stype,
vkcompute::utils::GPUMemoryLayout memory_layout,
const bool prepack_mat2 = false,
const float mat2_val = 0.0f);

//
// Debugging Utilities
//
Expand Down
Loading
Loading