Skip to content

[ET-VK][6/n] vulkan copy_command #3090

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions backends/vulkan/runtime/api/Context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,24 @@ void Context::register_shader_dispatch(
cmd_.dispatch(effective_global_wg);
}


void Context::register_copy(
PipelineBarrier& pipeline_barrier,
const VulkanImage& src,
const VulkanImage& dst,
const api::utils::uvec3& copy_range,
const api::utils::uvec3& src_offset,
const api::utils::uvec3& dst_offset) {
cmd_.insert_barrier(pipeline_barrier);
cmd_.copy_texture_to_texture(
src,
dst,
copy_range,
src_offset,
dst_offset);
}


void Context::submit_cmd_to_gpu(VkFence fence_handle, const bool final_use) {
if (cmd_) {
cmd_.end();
Expand Down
8 changes: 8 additions & 0 deletions backends/vulkan/runtime/api/Context.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,14 @@ class Context final {
const ShaderInfo&,
const utils::uvec3&);

void register_copy(
PipelineBarrier&,
const VulkanImage& src,
const VulkanImage& dst,
const api::utils::uvec3& copy_range,
const api::utils::uvec3& src_offset,
const api::utils::uvec3& dst_offset);

template <class S, class D>
bool submit_copy(
PipelineBarrier&,
Expand Down
51 changes: 48 additions & 3 deletions backends/vulkan/runtime/graph/ops/ExecuteNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,22 +33,67 @@ ExecuteNode::ExecuteNode(
graph.update_descriptor_counts(shader, /*execute = */ true);
}

void ExecuteNode::encode(ComputeGraph* graph) {
ExecuteNode::ExecuteNode(
ComputeGraph& graph,
const ArgGroup& src,
const ArgGroup& dst,
const api::utils::uvec3& copy_range,
const api::utils::uvec3& src_offset,
const api::utils::uvec3& dst_offset)
:
src_(src), dst_(dst), copy_range_(copy_range),
src_offset_(src_offset), dst_offset_(dst_offset) {
// TODO: Update descriptor counts in graph.
}


void ExecuteNode::encode_shader(ComputeGraph* graph) {
api::Context* const context = graph->context();
api::PipelineBarrier pipeline_barrier{};

std::unique_lock<std::mutex> cmd_lock = context->dispatch_lock();

api::DescriptorSet descriptor_set =
context->get_descriptor_set(shader_, local_workgroup_size_);
context->get_descriptor_set(shader_, *local_workgroup_size_);

uint32_t idx = 0;
idx = bind_values_to_descriptor_set(
graph, args_, pipeline_barrier, descriptor_set, idx);
bind_params_to_descriptor_set(params_, descriptor_set, idx);

context->register_shader_dispatch(
descriptor_set, pipeline_barrier, shader_, global_workgroup_size_);
descriptor_set, pipeline_barrier, shader_, *global_workgroup_size_);
}

void ExecuteNode::encode_copy(ComputeGraph* graph) {
api::Context* const context = graph->context();
api::PipelineBarrier pipeline_barrier{};

vTensorPtr src_v_t = graph->get_tensor(src_->refs[0]);
api::VulkanImage& src_image = src_v_t->image(
pipeline_barrier,
api::PipelineStage::COMPUTE, api::MemoryAccessType::READ);

vTensorPtr dst_v_t = graph->get_tensor(dst_->refs[0]);
api::VulkanImage& dst_image = dst_v_t->image(
pipeline_barrier,
api::PipelineStage::COMPUTE, api::MemoryAccessType::WRITE);

context->register_copy(
pipeline_barrier,
src_image,
dst_image,
*copy_range_,
*src_offset_,
*dst_offset_);
}

void ExecuteNode::encode(ComputeGraph* graph) {
if (shader_.src_code.size > 0) {
return encode_shader(graph);
} else {
return encode_copy(graph);
}
}

} // namespace vkcompute
28 changes: 26 additions & 2 deletions backends/vulkan/runtime/graph/ops/ExecuteNode.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
//123123

/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
Expand All @@ -12,6 +14,8 @@

#include <executorch/backends/vulkan/runtime/graph/containers/Value.h>

#include <optional>

namespace vkcompute {

class ComputeGraph;
Expand All @@ -24,6 +28,8 @@ struct ArgGroup {
ArgGroup(const ValueRef ref, const api::MemoryAccessType access)
: refs{ref}, access(access) {}

ArgGroup(const ArgGroup& ag): refs(ag.refs), access(ag.access) {}

ArgGroup(
const std::vector<ValueRef>& refs,
const api::MemoryAccessType access)
Expand Down Expand Up @@ -58,6 +64,14 @@ class ExecuteNode final {
const ResizeFunction& resize_fn = nullptr,
const std::vector<ValueRef>& resize_args = {});

ExecuteNode(
ComputeGraph& graph,
const ArgGroup& src,
const ArgGroup& dst,
const api::utils::uvec3& copy_range,
const api::utils::uvec3& src_offset,
const api::utils::uvec3& dst_offset);

~ExecuteNode() = default;

void encode(ComputeGraph* graph);
Expand All @@ -70,12 +84,22 @@ class ExecuteNode final {

protected:
const api::ShaderInfo shader_;
const api::utils::uvec3 global_workgroup_size_;
const api::utils::uvec3 local_workgroup_size_;
const std::optional<api::utils::uvec3> global_workgroup_size_;
const std::optional<api::utils::uvec3> local_workgroup_size_;
const std::vector<ArgGroup> args_;
std::vector<std::shared_ptr<api::UniformParamsBuffer>> params_;
const ResizeFunction resize_fn_;
const std::vector<ValueRef> resize_args_;

const std::optional<ArgGroup> src_;
const std::optional<ArgGroup> dst_;
const std::optional<api::utils::uvec3> copy_range_;
const std::optional<api::utils::uvec3> src_offset_;
const std::optional<api::utils::uvec3> dst_offset_;

private:
void encode_shader(ComputeGraph *graph);
void encode_copy(ComputeGraph *graph);
};

} // namespace vkcompute
105 changes: 105 additions & 0 deletions backends/vulkan/test/vulkan_compute_api_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,60 @@ TEST_F(VulkanComputeAPITest, texture_virtual_resize) {
}
}

TEST_F(VulkanComputeAPITest, copy_test) {
std::vector<int64_t> sizes = {1, 3, 4}; // c, h, w
vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
api::MemoryAllocation a_mem = allocate_memory_for(a);
a.image().bind_allocation(a_mem);

std::vector<float> data_a(a.gpu_numel());
std::iota(data_a.begin(), data_a.end(), 0.0);
fill_vtensor(a, data_a);

vTensor b = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
api::MemoryAllocation b_mem = allocate_memory_for(b);
b.image().bind_allocation(b_mem);

// Force clear memory
fill_vtensor(b, 0.0);

auto context = api::context();

// These operates on texture coordinate in (x, y, z), corresponds to (w, h,
// packed_c).
api::utils::uvec3 copy_range{2, 2, 1};
api::utils::uvec3 src_offset{0, 0, 0};
api::utils::uvec3 dst_offset{2, 1, 0};

vkcompute::api::PipelineBarrier pipeline_barrier{};

context->register_copy(
pipeline_barrier,
a.image(),
b.image(),
copy_range,
src_offset,
dst_offset
);

submit_to_gpu();

// Fetch result back
std::vector<float> data_out(b.gpu_numel());
extract_vtensor(b, data_out);

// w shifted by 2, h shifted by 1.
std::vector<float> expected{
0, 0, 0, 0,
0, 0, 0, 1,
0, 0, 4, 5,
};

for (size_t i = 0; i < expected.size(); i++) {
CHECK_VALUE(data_out, i, expected[i]);
}
}

//
// Compute Graph Tests
//
Expand Down Expand Up @@ -793,6 +847,57 @@ TEST(VulkanComputeGraphTest, test_large_graph) {
}
}


TEST(VulkanComputeGraphTest, test_register_copy) {
GraphConfig config;
ComputeGraph graph(config);

std::vector<int64_t> size = {1, 3, 4};

auto memory_layout = api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED;
IOValueRef a = graph.add_input_tensor(size, api::kFloat, memory_layout);

IOValueRef out = {};
out.value = graph.add_tensor(size, api::kFloat, memory_layout);

api::utils::uvec3 copy_range{2, 2, 1};
api::utils::uvec3 src_offset{0, 0, 0};
api::utils::uvec3 dst_offset{2, 1, 0};

graph.execute_nodes().emplace_back(new ExecuteNode(
graph,
{a.value, api::MemoryAccessType::READ},
{out.value, api::MemoryAccessType::WRITE},
copy_range,
src_offset,
dst_offset));

out.staging = graph.set_output_tensor(out.value);

graph.prepare();
graph.encode_execute();

// The tensor region that is not within the dst_offset + copy_range region is
// undefined, since they are outside the copy region. Hence we set the target
// value from 1.0. In the expected value, 0.0 are the don't-care values.
fill_vtensor(graph, a, 1.0, /* iota = */ true);

graph.execute();
EXTRACT_TENSOR(out);

std::vector<float> expected{
0, 0, 0, 0,
0, 0, 1, 2,
0, 0, 5, 6,
};

for (size_t i = 0; i < expected.size(); i++) {
if (expected[i] > 0){
CHECK_VALUE(data_out, i, expected[i]);
}
}
}

class VulkanToFromGPUShaderTest : public ::testing::Test {
public:
void SetUp() override {
Expand Down