Skip to content

[ET-VK][Op Redesign][5/n] Merge StagingNode into ExecuteNode #2260

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions backends/vulkan/runtime/graph/ComputeGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ ValueRef ComputeGraph::set_input_tensor(
if (use_staging) {
vTensor& tensor = get_val(idx).toTensor();
ValueRef staging_idx = add_staging(tensor.dtype(), tensor.gpu_numel());
execute_nodes_.emplace_back(new StagingNode(staging_idx, idx));
add_staging_to_tensor_node(*this, staging_idx, idx);
inputs_.push_back(staging_idx);
return staging_idx;
}
Expand All @@ -95,7 +95,7 @@ ValueRef ComputeGraph::set_output_tensor(
if (use_staging) {
vTensor& tensor = get_val(idx).toTensor();
ValueRef staging_idx = add_staging(tensor.dtype(), tensor.gpu_numel());
execute_nodes_.emplace_back(new StagingNode(idx, staging_idx));
add_tensor_to_staging_node(*this, idx, staging_idx);
outputs_.push_back(staging_idx);
return staging_idx;
}
Expand Down
23 changes: 9 additions & 14 deletions backends/vulkan/runtime/graph/ops/ExecuteNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,10 @@ class ComputeGraph;
* encoding of the shader corresponding to the op into the command buffer of a
* ComputeGraph.
*/
class ExecuteNode {
class ExecuteNode final {
friend class ComputeGraph;

public:
ExecuteNode(ValueRef input, ValueRef output)
: outputs_{output}, inputs_{input} {}

ExecuteNode(
const api::ShaderInfo& shader,
const api::utils::uvec3& global_workgroup_size,
Expand All @@ -49,21 +46,19 @@ class ExecuteNode {
inputs_(inputs),
params_(std::move(params)) {}

virtual ~ExecuteNode() = default;
~ExecuteNode() = default;

void encode(ComputeGraph* graph);

protected:
// TODO: Consider making members const after we remove StagingNode.
api::ShaderInfo shader_;
api::utils::uvec3 global_workgroup_size_;
api::utils::uvec3 local_workgroup_size_;
std::vector<ValueRef> outputs_;
std::vector<ValueRef> inputs_;
const api::ShaderInfo shader_;
const api::utils::uvec3 global_workgroup_size_;
const api::utils::uvec3 local_workgroup_size_;
const std::vector<ValueRef> outputs_;
const std::vector<ValueRef> inputs_;
// TODO(T180906086): pass multiple buffers and index with ValueRef.
// TODO(T180906457): allow re-computing param buffers.
api::UniformParamsBuffer params_;

public:
virtual void encode(ComputeGraph* graph);
};

} // namespace vulkan
Expand Down
12 changes: 10 additions & 2 deletions backends/vulkan/runtime/graph/ops/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,13 @@ void bind_tensor_to_descriptor_set(
}
}

void bind_staging_to_descriptor_set(
api::StorageBuffer& staging,
api::DescriptorSet& descriptor_set,
const uint32_t idx) {
descriptor_set.bind(idx, staging.buffer());
}

uint32_t bind_values_to_descriptor_set(
ComputeGraph* graph,
const std::vector<ValueRef>& args,
Expand All @@ -48,9 +55,10 @@ uint32_t bind_values_to_descriptor_set(
for (auto& arg : args) {
Value& val = graph->get_val(arg);
if (val.isTensor()) {
vTensor& tensor = val.toTensor();
bind_tensor_to_descriptor_set(
tensor, pipeline_barrier, accessType, descriptor_set, idx++);
val.toTensor(), pipeline_barrier, accessType, descriptor_set, idx++);
} else if (val.isStaging()) {
bind_staging_to_descriptor_set(val.toStaging(), descriptor_set, idx++);
} else {
VK_THROW("Unsupported type: ", val.type());
}
Expand Down
5 changes: 5 additions & 0 deletions backends/vulkan/runtime/graph/ops/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ void bind_tensor_to_descriptor_set(
api::DescriptorSet& descriptor_set,
const uint32_t idx);

void bind_staging_to_descriptor_set(
api::StorageBuffer& staging,
api::DescriptorSet& descriptor_set,
const uint32_t idx);

uint32_t bind_values_to_descriptor_set(
ComputeGraph* graph,
const std::vector<ValueRef>& args,
Expand Down
210 changes: 180 additions & 30 deletions backends/vulkan/runtime/graph/ops/impl/Staging.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include <executorch/backends/vulkan/runtime/graph/ops/impl/Staging.h>

#include <ATen/native/vulkan/impl/Common.h>
#include <ATen/native/vulkan/impl/Packing.h>

namespace at {
Expand Down Expand Up @@ -72,7 +73,7 @@ void encode_copy_to_vtensor(
api::Context* context,
api::StorageBuffer& staging,
vTensor& tensor) {
api::ShaderInfo shader = packing::get_nchw_to_image_shader(tensor);
api::ShaderInfo shader = get_nchw_to_image_shader(tensor);
api::PipelineBarrier pipeline_barrier{};
packing::record_nchw_to_image_op(
context,
Expand All @@ -83,41 +84,190 @@ void encode_copy_to_vtensor(
VK_NULL_HANDLE);
}

void encode_copy_from_vtensor(
api::Context* context,
vTensor& tensor,
api::StorageBuffer& staging) {
api::ShaderInfo shader = packing::get_image_to_nchw_shader(tensor);
api::PipelineBarrier pipeline_barrier{};
packing::record_image_to_nchw_op(
context,
struct StagingParams final {
api::utils::ivec3 extents;
int32_t plane_size;
api::utils::ivec2 channel_info;
};

StagingParams create_staging_params(const vTensor& t) {
int32_t height = api::utils::safe_downcast<int32_t>(dim_at<Dim4D::Height>(t));
int32_t width = api::utils::safe_downcast<int32_t>(dim_at<Dim4D::Width>(t));
int32_t channels =
api::utils::safe_downcast<int32_t>(dim_at<Dim4D::Channel>(t));

int32_t plane_size = height * width;
int32_t c_depth = api::utils::div_up(channels, 4);

return {
api::utils::make_ivec3(t.extents()),
plane_size,
{c_depth, channels},
};
}

void add_staging_to_tensor_node(
ComputeGraph& graph,
const ValueRef in_staging,
const ValueRef out_tensor) {
vTensor& t_out = graph.get_val(out_tensor).toTensor();
VK_CHECK_COND(graph.get_val(in_staging).isStaging());

api::ShaderInfo shader = get_nchw_to_image_shader(t_out);

api::utils::uvec3 global_size = t_out.extents();
api::utils::uvec3 local_size = adaptive_work_group_size(global_size);

api::UniformParamsBuffer params(
graph.context(), create_staging_params(t_out));

graph.execute_nodes().emplace_back(new ExecuteNode(
shader,
tensor,
staging.buffer(),
pipeline_barrier,
VK_NULL_HANDLE);
global_size,
local_size,
{out_tensor},
{in_staging},
std::move(params)));
}

StagingNode::StagingNode(ValueRef from, ValueRef to) : ExecuteNode(from, to) {}
void add_tensor_to_staging_node(
ComputeGraph& graph,
const ValueRef in_tensor,
const ValueRef out_staging) {
vTensor& t_in = graph.get_val(in_tensor).toTensor();
VK_CHECK_COND(graph.get_val(out_staging).isStaging());

void StagingNode::encode(ComputeGraph* graph) {
Value& in_val = graph->get_val(inputs_[0]);
Value& out_val = graph->get_val(outputs_[0]);
api::ShaderInfo shader = get_image_to_nchw_shader(t_in);

api::utils::uvec3 global_size = t_in.extents();
api::utils::uvec3 local_size = adaptive_work_group_size(global_size);

StagingParams sp = create_staging_params(t_in);
api::UniformParamsBuffer params(graph.context(), sp);

// TODO(T181194784): These are workgroup sizes for special cases. Refactor the
// calculation of workgroup sizes to a standalone function. We should use
// scalar type to get the shader name, and use the shader name to get the
// workgroup size.
if (t_in.dtype() == api::ScalarType::QUInt8 ||
t_in.dtype() == api::ScalarType::QInt8 || t_in.dtype() == api::kBool) {
if (sp.plane_size % 4 == 0) {
global_size.data[0u] = sp.plane_size / 4;
global_size.data[1u] = 1;
local_size.data[0u] *= local_size.data[1u];
local_size.data[1u] = 1;
} else {
uint32_t numel = t_in.numel();
global_size = {api::utils::div_up(numel, uint32_t(4)), 1u, 1u};
local_size = {64u, 1u, 1u};
}
}

graph.execute_nodes().emplace_back(new ExecuteNode(
shader,
global_size,
local_size,
{in_tensor},
{out_staging},
std::move(params)));
}

api::ShaderInfo get_nchw_to_image_shader(const vTensor& v_dst) {
if (v_dst.is_quantized()) {
switch (v_dst.storage_type()) {
case api::StorageType::TEXTURE_3D:
switch (v_dst.dtype()) {
case api::ScalarType::QUInt8:
return VK_KERNEL(nchw_to_image_uint8);
case api::ScalarType::QInt8:
return VK_KERNEL(nchw_to_image_int8);
case api::ScalarType::QInt32:
return VK_KERNEL(nchw_to_image_int32);
default:
VK_THROW(
"Vulkan quantization currently not supported for dtype ",
v_dst.dtype());
}
case api::StorageType::TEXTURE_2D:
switch (v_dst.dtype()) {
case api::ScalarType::QUInt8:
return VK_KERNEL(nchw_to_image2d_uint8);
case api::ScalarType::QInt8:
return VK_KERNEL(nchw_to_image2d_int8);
case api::ScalarType::QInt32:
return VK_KERNEL(nchw_to_image2d_int32);
default:
VK_THROW(
"Vulkan quantization currently not supported for dtype ",
v_dst.dtype());
}
default:
VK_THROW("No kernel available!");
case api::StorageType::BUFFER:
case api::StorageType::UNKNOWN:
VK_THROW("Requested storage type must be a texture type.");
}
}

if (v_dst.dtype() == api::kFloat) {
switch (v_dst.storage_type()) {
case api::StorageType::TEXTURE_3D:
return VK_KERNEL(nchw_to_image);
case api::StorageType::TEXTURE_2D:
return VK_KERNEL(nchw_to_image2d);
default:
VK_THROW("No kernel available!");
}
} else if (v_dst.dtype() == api::kBool) {
switch (v_dst.storage_type()) {
case api::StorageType::TEXTURE_3D:
return VK_KERNEL(nchw_to_image_bool);
default:
VK_THROW("No kernel available!");
}
} else {
VK_THROW("Unsupported dtype!");
}
}

api::ShaderInfo get_image_to_nchw_shader(const vTensor& v_src) {
if (v_src.is_quantized() || v_src.dtype() == api::kBool) {
auto plane_size =
dim_at<Dim4D::Height>(v_src) * dim_at<Dim4D::Width>(v_src);
switch (v_src.storage_type()) {
case api::StorageType::TEXTURE_3D:
switch (v_src.dtype()) {
case api::ScalarType::QUInt8:
case api::ScalarType::QInt8:
case api::kBool:
return plane_size % 4 == 0 ? VK_KERNEL(image_to_nchw_quantized_mul4)
: VK_KERNEL(image_to_nchw_uint);
case api::ScalarType::QInt32:
return VK_KERNEL(image_to_nchw_int32);
default:
VK_THROW(
"Vulkan quantization currently not supported for dtype ",
v_src.dtype());
}
default:
VK_THROW("No kernel available!");
case api::StorageType::BUFFER:
case api::StorageType::UNKNOWN:
VK_THROW("Requested storage type must be a texture type.");
}
}

if (in_val.isStaging() && out_val.isTensor()) {
api::StorageBuffer& from_staging = graph->get_val(inputs_[0]).toStaging();
vTensor& to_tensor = graph->get_val(outputs_[0]).toTensor();
encode_copy_to_vtensor(graph->context(), from_staging, to_tensor);
} else if (in_val.isTensor() && out_val.isStaging()) {
vTensor& from_tensor = graph->get_val(inputs_[0]).toTensor();
api::StorageBuffer& to_staging = graph->get_val(outputs_[0]).toStaging();
encode_copy_from_vtensor(graph->context(), from_tensor, to_staging);
if (v_src.dtype() == api::kFloat) {
switch (v_src.storage_type()) {
case api::StorageType::TEXTURE_3D:
return VK_KERNEL(image_to_nchw);
case api::StorageType::TEXTURE_2D:
return VK_KERNEL(image2d_to_nchw);
default:
VK_THROW("No kernel available!");
}
} else {
VK_THROW(
"Unexpected input value type ",
in_val.type(),
" and output value type ",
out_val.type());
VK_THROW("Unsupported dtype!");
}
}

Expand Down
32 changes: 19 additions & 13 deletions backends/vulkan/runtime/graph/ops/impl/Staging.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

#include <executorch/backends/vulkan/runtime/graph/ComputeGraph.h>

#include <string.h>
#include <cstring>

namespace at {
namespace native {
Expand Down Expand Up @@ -76,20 +76,26 @@ void encode_copy_to_vtensor(
api::Context* context,
api::StorageBuffer& staging,
vTensor& tensor);
void encode_copy_from_vtensor(
api::Context* context,
vTensor& tensor,
api::StorageBuffer& staging);

/*
* OpNode that allows copying data into and out of a staging buffer.
*/
class StagingNode : public virtual ExecuteNode {
public:
explicit StagingNode(ValueRef from, ValueRef to);
//
// Functions to initialize ExecuteNode
//

void add_staging_to_tensor_node(
ComputeGraph& graph,
const ValueRef in_staging,
const ValueRef out_tensor);
void add_tensor_to_staging_node(
ComputeGraph& graph,
const ValueRef in_tensor,
const ValueRef out_staging);

//
// Functions to get shaders
//

void encode(ComputeGraph* graph) override;
};
api::ShaderInfo get_nchw_to_image_shader(const vTensor& v_dst);
api::ShaderInfo get_image_to_nchw_shader(const vTensor& v_src);

} // namespace vulkan
} // namespace native
Expand Down
Loading