Skip to content

Commit 23ff05e

Browse files
committed
[ET-VK][6/n] vulkan copy_command
Pull Request resolved: #3090 Some operators can be specified directly as a copy-command with offsets. This diff exposes the command thru the ComputeGraph. ghstack-source-id: 222828075 @exported-using-ghexport Differential Revision: [D56174686](https://our.internmc.facebook.com/intern/diff/D56174686/)
1 parent c8ad41c commit 23ff05e

File tree

5 files changed

+205
-5
lines changed

5 files changed

+205
-5
lines changed

backends/vulkan/runtime/api/Context.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,24 @@ void Context::register_shader_dispatch(
101101
cmd_.dispatch(effective_global_wg);
102102
}
103103

104+
105+
void Context::register_copy(
106+
PipelineBarrier& pipeline_barrier,
107+
const VulkanImage& src,
108+
const VulkanImage& dst,
109+
const api::utils::uvec3& copy_range,
110+
const api::utils::uvec3& src_offset,
111+
const api::utils::uvec3& dst_offset) {
112+
cmd_.insert_barrier(pipeline_barrier);
113+
cmd_.copy_texture_to_texture(
114+
src,
115+
dst,
116+
copy_range,
117+
src_offset,
118+
dst_offset);
119+
}
120+
121+
104122
void Context::submit_cmd_to_gpu(VkFence fence_handle, const bool final_use) {
105123
if (cmd_) {
106124
cmd_.end();

backends/vulkan/runtime/api/Context.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,14 @@ class Context final {
180180
const ShaderInfo&,
181181
const utils::uvec3&);
182182

183+
void register_copy(
184+
PipelineBarrier&,
185+
const VulkanImage& src,
186+
const VulkanImage& dst,
187+
const api::utils::uvec3& copy_range,
188+
const api::utils::uvec3& src_offset,
189+
const api::utils::uvec3& dst_offset);
190+
183191
template <class S, class D>
184192
bool submit_copy(
185193
PipelineBarrier&,

backends/vulkan/runtime/graph/ops/ExecuteNode.cpp

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,22 +33,67 @@ ExecuteNode::ExecuteNode(
3333
graph.update_descriptor_counts(shader, /*execute = */ true);
3434
}
3535

36-
void ExecuteNode::encode(ComputeGraph* graph) {
36+
ExecuteNode::ExecuteNode(
37+
ComputeGraph& graph,
38+
const ArgGroup& src,
39+
const ArgGroup& dst,
40+
const api::utils::uvec3& copy_range,
41+
const api::utils::uvec3& src_offset,
42+
const api::utils::uvec3& dst_offset)
43+
:
44+
src_(src), dst_(dst), copy_range_(copy_range),
45+
src_offset_(src_offset), dst_offset_(dst_offset) {
46+
// TODO: Update descriptor counts in graph.
47+
}
48+
49+
50+
void ExecuteNode::encode_shader(ComputeGraph* graph) {
3751
api::Context* const context = graph->context();
3852
api::PipelineBarrier pipeline_barrier{};
3953

4054
std::unique_lock<std::mutex> cmd_lock = context->dispatch_lock();
4155

4256
api::DescriptorSet descriptor_set =
43-
context->get_descriptor_set(shader_, local_workgroup_size_);
57+
context->get_descriptor_set(shader_, *local_workgroup_size_);
4458

4559
uint32_t idx = 0;
4660
idx = bind_values_to_descriptor_set(
4761
graph, args_, pipeline_barrier, descriptor_set, idx);
4862
bind_params_to_descriptor_set(params_, descriptor_set, idx);
4963

5064
context->register_shader_dispatch(
51-
descriptor_set, pipeline_barrier, shader_, global_workgroup_size_);
65+
descriptor_set, pipeline_barrier, shader_, *global_workgroup_size_);
66+
}
67+
68+
void ExecuteNode::encode_copy(ComputeGraph* graph) {
69+
api::Context* const context = graph->context();
70+
api::PipelineBarrier pipeline_barrier{};
71+
72+
vTensorPtr src_v_t = graph->get_tensor(src_->refs[0]);
73+
api::VulkanImage& src_image = src_v_t->image(
74+
pipeline_barrier,
75+
api::PipelineStage::COMPUTE, api::MemoryAccessType::READ);
76+
77+
vTensorPtr dst_v_t = graph->get_tensor(dst_->refs[0]);
78+
api::VulkanImage& dst_image = dst_v_t->image(
79+
pipeline_barrier,
80+
api::PipelineStage::COMPUTE, api::MemoryAccessType::WRITE);
81+
82+
context->register_copy(
83+
pipeline_barrier,
84+
src_image,
85+
dst_image,
86+
*copy_range_,
87+
*src_offset_,
88+
*dst_offset_);
89+
}
90+
91+
void ExecuteNode::encode(ComputeGraph* graph) {
92+
if (shader_.src_code.size > 0) {
93+
return encode_shader(graph);
94+
} else {
95+
return encode_copy(graph);
96+
}
5297
}
5398

5499
} // namespace vkcompute

backends/vulkan/runtime/graph/ops/ExecuteNode.h

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//123123
2+
13
/*
24
* Copyright (c) Meta Platforms, Inc. and affiliates.
35
* All rights reserved.
@@ -12,6 +14,8 @@
1214

1315
#include <executorch/backends/vulkan/runtime/graph/containers/Value.h>
1416

17+
#include <optional>
18+
1519
namespace vkcompute {
1620

1721
class ComputeGraph;
@@ -24,6 +28,8 @@ struct ArgGroup {
2428
ArgGroup(const ValueRef ref, const api::MemoryAccessType access)
2529
: refs{ref}, access(access) {}
2630

31+
ArgGroup(const ArgGroup& ag): refs(ag.refs), access(ag.access) {}
32+
2733
ArgGroup(
2834
const std::vector<ValueRef>& refs,
2935
const api::MemoryAccessType access)
@@ -58,6 +64,14 @@ class ExecuteNode final {
5864
const ResizeFunction& resize_fn = nullptr,
5965
const std::vector<ValueRef>& resize_args = {});
6066

67+
ExecuteNode(
68+
ComputeGraph& graph,
69+
const ArgGroup& src,
70+
const ArgGroup& dst,
71+
const api::utils::uvec3& copy_range,
72+
const api::utils::uvec3& src_offset,
73+
const api::utils::uvec3& dst_offset);
74+
6175
~ExecuteNode() = default;
6276

6377
void encode(ComputeGraph* graph);
@@ -70,12 +84,22 @@ class ExecuteNode final {
7084

7185
protected:
7286
const api::ShaderInfo shader_;
73-
const api::utils::uvec3 global_workgroup_size_;
74-
const api::utils::uvec3 local_workgroup_size_;
87+
const std::optional<api::utils::uvec3> global_workgroup_size_;
88+
const std::optional<api::utils::uvec3> local_workgroup_size_;
7589
const std::vector<ArgGroup> args_;
7690
std::vector<std::shared_ptr<api::UniformParamsBuffer>> params_;
7791
const ResizeFunction resize_fn_;
7892
const std::vector<ValueRef> resize_args_;
93+
94+
const std::optional<ArgGroup> src_;
95+
const std::optional<ArgGroup> dst_;
96+
const std::optional<api::utils::uvec3> copy_range_;
97+
const std::optional<api::utils::uvec3> src_offset_;
98+
const std::optional<api::utils::uvec3> dst_offset_;
99+
100+
private:
101+
void encode_shader(ComputeGraph *graph);
102+
void encode_copy(ComputeGraph *graph);
79103
};
80104

81105
} // namespace vkcompute

backends/vulkan/test/vulkan_compute_api_test.cpp

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,60 @@ TEST_F(VulkanComputeAPITest, texture_virtual_resize) {
445445
}
446446
}
447447

448+
TEST_F(VulkanComputeAPITest, copy_test) {
449+
std::vector<int64_t> sizes = {1, 3, 4}; // c, h, w
450+
vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
451+
api::MemoryAllocation a_mem = allocate_memory_for(a);
452+
a.image().bind_allocation(a_mem);
453+
454+
std::vector<float> data_a(a.gpu_numel());
455+
std::iota(data_a.begin(), data_a.end(), 0.0);
456+
fill_vtensor(a, data_a);
457+
458+
vTensor b = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
459+
api::MemoryAllocation b_mem = allocate_memory_for(b);
460+
b.image().bind_allocation(b_mem);
461+
462+
// Force clear memory
463+
fill_vtensor(b, 0.0);
464+
465+
auto context = api::context();
466+
467+
// These operates on texture coordinate in (x, y, z), corresponds to (w, h,
468+
// packed_c).
469+
api::utils::uvec3 copy_range{2, 2, 1};
470+
api::utils::uvec3 src_offset{0, 0, 0};
471+
api::utils::uvec3 dst_offset{2, 1, 0};
472+
473+
vkcompute::api::PipelineBarrier pipeline_barrier{};
474+
475+
context->register_copy(
476+
pipeline_barrier,
477+
a.image(),
478+
b.image(),
479+
copy_range,
480+
src_offset,
481+
dst_offset
482+
);
483+
484+
submit_to_gpu();
485+
486+
// Fetch result back
487+
std::vector<float> data_out(b.gpu_numel());
488+
extract_vtensor(b, data_out);
489+
490+
// w shifted by 2, h shifted by 1.
491+
std::vector<float> expected{
492+
0, 0, 0, 0,
493+
0, 0, 0, 1,
494+
0, 0, 4, 5,
495+
};
496+
497+
for (size_t i = 0; i < expected.size(); i++) {
498+
CHECK_VALUE(data_out, i, expected[i]);
499+
}
500+
}
501+
448502
//
449503
// Compute Graph Tests
450504
//
@@ -793,6 +847,57 @@ TEST(VulkanComputeGraphTest, test_large_graph) {
793847
}
794848
}
795849

850+
851+
TEST(VulkanComputeGraphTest, test_register_copy) {
852+
GraphConfig config;
853+
ComputeGraph graph(config);
854+
855+
std::vector<int64_t> size = {1, 3, 4};
856+
857+
auto memory_layout = api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED;
858+
IOValueRef a = graph.add_input_tensor(size, api::kFloat, memory_layout);
859+
860+
IOValueRef out = {};
861+
out.value = graph.add_tensor(size, api::kFloat, memory_layout);
862+
863+
api::utils::uvec3 copy_range{2, 2, 1};
864+
api::utils::uvec3 src_offset{0, 0, 0};
865+
api::utils::uvec3 dst_offset{2, 1, 0};
866+
867+
graph.execute_nodes().emplace_back(new ExecuteNode(
868+
graph,
869+
{a.value, api::MemoryAccessType::READ},
870+
{out.value, api::MemoryAccessType::WRITE},
871+
copy_range,
872+
src_offset,
873+
dst_offset));
874+
875+
out.staging = graph.set_output_tensor(out.value);
876+
877+
graph.prepare();
878+
graph.encode_execute();
879+
880+
// The tensor region that is not within the dst_offset + copy_range region is
881+
// undefined, since they are outside the copy region. Hence we set the target
882+
// value from 1.0. In the expected value, 0.0 are the don't-care values.
883+
fill_vtensor(graph, a, 1.0, /* iota = */ true);
884+
885+
graph.execute();
886+
EXTRACT_TENSOR(out);
887+
888+
std::vector<float> expected{
889+
0, 0, 0, 0,
890+
0, 0, 1, 2,
891+
0, 0, 5, 6,
892+
};
893+
894+
for (size_t i = 0; i < expected.size(); i++) {
895+
if (expected[i] > 0){
896+
CHECK_VALUE(data_out, i, expected[i]);
897+
}
898+
}
899+
}
900+
796901
class VulkanToFromGPUShaderTest : public ::testing::Test {
797902
public:
798903
void SetUp() override {

0 commit comments

Comments
 (0)