Skip to content

[ET-VK] Replace Uniform buffers with push constants for copy op #7267

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
14 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 11 additions & 12 deletions backends/vulkan/runtime/graph/ops/glsl/copy_channel_offset.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,16 @@ ${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
${layout_declare_tensor(B, "r", "existing_out", DTYPE, STORAGE)}
${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)}

${layout_declare_ubo(B, "ivec4", "out_sizes")}
${layout_declare_ubo(B, "ivec4", "in_sizes")}

layout(set = 0, binding = 5) uniform PRECISION restrict CopyArgs {
layout(push_constant) uniform restrict Block {
ivec4 out_sizes;
ivec4 in_sizes;
// Operates on (x, y, z) logical extents.
ivec3 range;
// channel_range is stored in range.w
ivec4 range;
// Analogus to range variable in copy. It defines the # of channel being
// copied.
int channel_range;
ivec3 dst_offset;
int dst_channel_offset;
// dst channel offset is stored in dst_offset.w
ivec4 dst_offset;
int src_channel_offset;
};

Expand All @@ -47,11 +46,11 @@ void main() {
// Note: Unlike other shaders, the range is often not equal to the destination
// texture extent.
const ivec3 lpos = ivec3(gl_GlobalInvocationID);
if (any(greaterThanEqual(lpos, range))) {
if (any(greaterThanEqual(lpos, range.xyz))) {
return;
}

const ivec3 out_lpos = lpos + dst_offset;
const ivec3 out_lpos = lpos + dst_offset.xyz;

const ivec4 out_tidx = lpos_to_tidx(out_lpos, out_sizes, out_axis_map.w, packed_dim);

Expand All @@ -61,12 +60,12 @@ void main() {
ivec4 in_tidx = out_tidx;
for (int i=0; i<4; i++) {

in_tidx[packed_dim] = out_tidx[packed_dim] - dst_channel_offset + i;
in_tidx[packed_dim] = out_tidx[packed_dim] - dst_offset.w + i;

// Handle the partial update for begining of channel in an existing tensor.
// If the source channel index is below zero or exceeds the range, we skip
// updating the element to avoid overwriting existing data.
if ((in_tidx[packed_dim] < 0) || (in_tidx[packed_dim] >= channel_range)) {
if ((in_tidx[packed_dim] < 0) || (in_tidx[packed_dim] >= range.w)) {
continue;
}

Expand Down
6 changes: 5 additions & 1 deletion backends/vulkan/runtime/graph/ops/glsl/copy_offset.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@ layout(std430) buffer;
${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)}

${layout_declare_ubo(B, "ivec3", "range", "ivec3", "src_offset", "ivec3", "dst_offset")}
layout(push_constant) uniform restrict Block {
ivec3 range;
ivec3 src_offset;
ivec3 dst_offset;
};

#include "indexing_utils.h"

Expand Down
70 changes: 33 additions & 37 deletions backends/vulkan/runtime/graph/ops/impl/Copy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,6 @@ void add_copy_offset_node(
add_dtype_suffix(kernel_name, *t_out);
add_storage_type_suffix(kernel_name, *t_out);

const struct Block final {
alignas(16) ivec3 range;
alignas(16) ivec3 src_offset;
alignas(16) ivec3 dst_offset;
} offset_params{
range,
src_offset,
dst_offset,
};

auto shader = VK_KERNEL_FROM_STR(kernel_name);

graph.execute_nodes().emplace_back(new DispatchNode(
Expand All @@ -56,11 +46,18 @@ void add_copy_offset_node(
{in, vkapi::kRead},
},
// Parameter buffers
{
graph.create_params_buffer(offset_params),
},
{},
// Specialization Constants
{graph.hashed_layout_of(out), graph.hashed_layout_of(in)}));
{graph.hashed_layout_of(out), graph.hashed_layout_of(in)},
nullptr,
{},
{
PushConstantDataInfo(&range, sizeof(range), sizeof(utils::ivec4)),
PushConstantDataInfo(
&src_offset, sizeof(src_offset), sizeof(utils::ivec4)),
PushConstantDataInfo(
&dst_offset, sizeof(dst_offset), sizeof(utils::ivec4)),
}));
}

void add_copy_channel_offset_node(
Expand Down Expand Up @@ -128,28 +125,23 @@ void add_copy_channel_offset_node(
// The shader combines the global invocation id and the dst_offset to get
// the actual coordinate.

ivec3 dst_offset{
const ivec3 dst_offset{
0, 0, dst_first_z + batch_idx * utils::div_up_4(out_channels)};

uvec3 global_size{
const uvec3 global_size{
utils::safe_downcast<uint32_t>(dim_at<kWidth4D>(in_sizes)),
utils::safe_downcast<uint32_t>(dim_at<kHeight4D>(in_sizes)),
utils::safe_downcast<uint32_t>(dst_last_z - dst_first_z + 1)};
uvec3 local_size = graph.create_local_wg_size(global_size);

const struct Block final {
ivec3 range;
int32_t channel_range;
ivec3 dst_offset;
int32_t dst_channel_offset;
int32_t src_channel_offset;
} channel_offset_params{
utils::make_ivec3(global_size),
channel_range,
dst_offset,
dst_channel_offset,
src_channel_offset,
};
const uvec3 local_size = graph.create_local_wg_size(global_size);

const utils::ivec4 range_params = {
static_cast<int>(global_size[0]),
static_cast<int>(global_size[1]),
static_cast<int>(global_size[2]),
channel_range};

const utils::ivec4 offset_params = {
dst_offset[0], dst_offset[1], dst_offset[2], dst_channel_offset};

auto shader = VK_KERNEL_FROM_STR(kernel_name);

Expand All @@ -165,13 +157,17 @@ void add_copy_channel_offset_node(
{in, vkapi::MemoryAccessType::READ},
},
// Parameter buffers
{
t_out->sizes_ubo(),
t_in->sizes_ubo(),
graph.create_params_buffer(channel_offset_params),
},
{},
// Specialization Constants
{graph.hashed_layout_of(out), graph.hashed_layout_of(in)}));
{graph.hashed_layout_of(out), graph.hashed_layout_of(in)},
nullptr,
{},
{graph.sizes_pc_of(out),
graph.sizes_pc_of(in),
PushConstantDataInfo(&range_params, sizeof(range_params)),
PushConstantDataInfo(&offset_params, sizeof(offset_params)),
PushConstantDataInfo(
&src_channel_offset, sizeof(src_channel_offset))}));
}
}

Expand Down