Skip to content

[ET-VK] Replace Uniform buffers with push constants for binary op #7346

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions backends/vulkan/runtime/graph/ops/DispatchNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,15 @@ class PushConstantDataInfo {
payload_.attr = attr;
}

explicit PushConstantDataInfo(const void* data, uint32_t dataLen)
explicit PushConstantDataInfo(
const void* data,
uint32_t dataLen,
uint32_t pushConstantLen = 0)
: tensorUniformData(nullptr) {
VK_CHECK_COND(
dataLen <= 16, "Single push constant data size must be <= 16 bytes");
payload_.dataSize = dataLen;
memcpy(payload_.data, data, payload_.dataSize);
payload_.dataSize = pushConstantLen ? pushConstantLen : dataLen;
memcpy(payload_.data, data, dataLen);
}

/*
Expand Down
13 changes: 8 additions & 5 deletions backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,6 @@ layout(std430) buffer;
${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)}
${layout_declare_tensor(B, "r", "t_other", DTYPE, STORAGE)}
${layout_declare_ubo(B, "ivec4", "out_sizes")}
${layout_declare_ubo(B, "ivec4", "in_sizes")}
${layout_declare_ubo(B, "ivec4", "other_sizes")}
${layout_declare_ubo(B, "ivec2", "broadcast_params")}
${layout_declare_ubo(B, "float", "alpha")}

#include "broadcasting_utils.h"
#include "indexing_utils.h"
Expand All @@ -40,6 +35,14 @@ const lowp ivec4 in_axis_map = unhash_axis_map(in_layout);
${layout_declare_spec_const(C, "int", "other_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 other_axis_map = unhash_axis_map(other_layout);

layout(push_constant) uniform restrict Block {
ivec4 out_sizes;
ivec4 in_sizes;
ivec4 other_sizes;
ivec2 broadcast_params;
float alpha;
};

void main() {
const ivec3 lpos = ivec3(gl_GlobalInvocationID);
const ivec4 tidx = lpos_to_tidx(lpos, out_sizes, out_axis_map.w, packed_dim);
Expand Down
17 changes: 10 additions & 7 deletions backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,10 @@ void add_binary_op_node(
alpha_val = graph.extract_scalar<float>(alpha);
}

const utils::ivec2 broadcast_params = create_broadcast_params(*t_in1, *t_in2);
const struct BinaryOpsParams {
const utils::ivec2 broadcast_params;
const float alpha_val;
} binary_ops_params{create_broadcast_params(*t_in1, *t_in2), alpha_val};

std::string kernel_name("binary_");
kernel_name.reserve(kShaderNameReserve);
Expand All @@ -83,16 +86,16 @@ void add_binary_op_node(
{{out, vkapi::MemoryAccessType::WRITE},
{{arg1, arg2}, vkapi::MemoryAccessType::READ}},
// Shader params buffers
{t_out->sizes_ubo(),
t_in1->sizes_ubo(),
t_in2->sizes_ubo(),
graph.create_params_buffer(broadcast_params),
graph.create_params_buffer(alpha_val)},
{},
// Specialization Constants
{t_out->hashed_layout(), t_in1->hashed_layout(), t_in2->hashed_layout()},
// Resizing Logic
resize_binary_op_node,
{}));
{},
{{graph.sizes_pc_of(out),
graph.sizes_pc_of(arg1),
graph.sizes_pc_of(arg2),
PushConstantDataInfo(&binary_ops_params, sizeof(binary_ops_params))}}));
}

#define DEFINE_BINARY_OP_WITH_ALPHA_FN(op_name) \
Expand Down
11 changes: 4 additions & 7 deletions backends/vulkan/test/vulkan_compute_api_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1601,9 +1601,7 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) {
auto addFn = VK_GET_OP_FN("aten.add.Tensor");
addFn(graph, {a.value, b.value, kDummyValueRef, c});

// +2: alpha UBO, broadcast UBO for arithmetic shader
// +1: t.sizes_ubo() for arithmetic shader output c
expected_vma_allocation_count += 3;
// no new allocations if binary op uses push constants
EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count);

IOValueRef d = graph.add_input_tensor(
Expand All @@ -1624,17 +1622,16 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) {
auto mulFn = VK_GET_OP_FN("aten.mul.Tensor");
mulFn(graph, {c, d.value, e});

// +2: alpha UBO, broadcast UBO for arithmetic shader
// +1: t.sizes_ubo() for arithmetic shader output e
expected_vma_allocation_count += 3;
// no new allocations if binary op uses push constants
EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count);

IOValueRef out = {};
out.value = e;
out.staging = graph.set_output_tensor(out.value);

// +1: staging buffer input tensor
// +1: staging buffer for the output tensor
expected_vma_allocation_count += 1;
expected_vma_allocation_count += 2;
EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count);

graph.prepare();
Expand Down
Loading