Skip to content

[ET-VK][ez] Replace std::stringstream with std::string for Shader names #2964

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions backends/vulkan/runtime/graph/ops/PrepackNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,10 @@
namespace vkcompute {

api::ShaderInfo get_noop_shader(ComputeGraph& graph, const ValueRef packed) {
std::stringstream noop_shader_name;
noop_shader_name << "no_op";
apply_ndim_suffix(noop_shader_name, graph.get_val(packed).toTensor());
apply_dtype_suffix(noop_shader_name, graph.get_val(packed).toTensor());
return VK_KERNEL_FROM_STR(noop_shader_name.str());
std::string noop_shader_name("no_op");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this path doesn't use reserve, so on platforms without SSO this implementation could be worse. (It looks like the string data fits in 16 bytes otherwise, in which case you're good on libstdc++ and libc++, but it would be easy to break that in the future.)

add_ndim_suffix(noop_shader_name, graph.get_val(packed).toTensor());
add_dtype_suffix(noop_shader_name, graph.get_val(packed).toTensor());
return VK_KERNEL_FROM_STR(noop_shader_name);
}

PrepackNode::PrepackNode(
Expand Down
11 changes: 6 additions & 5 deletions backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,15 @@ void add_binary_op_node(
const api::utils::ivec2 broadcast_params =
create_broadcast_params(t_in1, t_in2);

std::stringstream kernel_name;
kernel_name << "binary_" << op_name;
apply_memory_layout_suffix(kernel_name, t_out);
apply_dtype_suffix(kernel_name, t_out);
std::string kernel_name("binary_");
kernel_name.reserve(kShaderNameReserve);
Comment on lines +78 to +79
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: you would be slightly better off reserving before writing "binary_" into the string; as written, "binary_" might get written into the inline storage once before getting copied to the heap if the compiler doesn't catch it. surprisingly, this was very easy to demonstrate: clang 17 gets it wrong because reserve() isn't getting inlined. https://godbolt.org/z/jKcf3KGG1

kernel_name += op_name;
add_memory_layout_suffix(kernel_name, t_out);
add_dtype_suffix(kernel_name, t_out);

graph.execute_nodes().emplace_back(new ExecuteNode(
graph,
VK_KERNEL_FROM_STR(kernel_name.str()),
VK_KERNEL_FROM_STR(kernel_name),
global_size,
local_size,
// Inputs and Outputs
Expand Down
23 changes: 12 additions & 11 deletions backends/vulkan/runtime/graph/ops/impl/Conv2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,40 +90,41 @@ api::ShaderInfo get_conv2d_shader(
const bool prepack_weights,
const Conv2dMethod method,
const ValueRef weight) {
std::stringstream kernel_name;
std::string kernel_name;
kernel_name.reserve(kShaderNameReserve);
switch (method) {
case Conv2dMethod::Depthwise:
kernel_name << "conv2d_dw";
kernel_name = "conv2d_dw";
if (!prepack_weights) {
const auto& weight_sizes = graph.get_val(weight).toTensorRef().sizes;
if (weight_sizes.at(2) == 3 && weight_sizes.at(3) == 3) {
kernel_name << "_output_tile_3x3";
kernel_name += "_output_tile_3x3";
}
if (weight_sizes.at(2) == 5 && weight_sizes.at(3) == 5) {
kernel_name << "_output_tile_5x5";
kernel_name += "_output_tile_5x5";
}
}
break;
case Conv2dMethod::Pointwise:
if (prepack_weights) {
kernel_name << "conv2d";
kernel_name = "conv2d";
} else {
kernel_name << "conv2d_pw";
kernel_name = "conv2d_pw";
}
break;
case Conv2dMethod::SlidingWindow:
kernel_name << "conv2d";
kernel_name = "conv2d";
break;
case Conv2dMethod::Transposed:
kernel_name << "conv_transpose2d";
kernel_name = "conv_transpose2d";
break;
}
if (prepack_weights) {
kernel_name << "_prepack_weights";
kernel_name += "_prepack_weights";
}
apply_dtype_suffix(kernel_name, t_out);
add_dtype_suffix(kernel_name, t_out);

return VK_KERNEL_FROM_STR(kernel_name.str());
return VK_KERNEL_FROM_STR(kernel_name);
}

std::vector<int64_t> get_final_sizes(
Expand Down
12 changes: 6 additions & 6 deletions backends/vulkan/runtime/graph/ops/impl/MatMul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,15 +78,15 @@ void add_matmul_node(
api::utils::uvec3 global_size = t_out.virtual_extents();
api::utils::uvec3 local_size = adaptive_work_group_size(global_size);

std::stringstream kernel_name;
kernel_name << "matmul";
apply_memory_layout_suffix(kernel_name, t_mat1);
apply_memory_layout_suffix(kernel_name, t_mat2);
apply_dtype_suffix(kernel_name, t_out);
std::string kernel_name("matmul");
kernel_name.reserve(kShaderNameReserve);
add_memory_layout_suffix(kernel_name, t_mat1);
add_memory_layout_suffix(kernel_name, t_mat2);
add_dtype_suffix(kernel_name, t_out);

graph.execute_nodes().emplace_back(new ExecuteNode(
graph,
VK_KERNEL_FROM_STR(kernel_name.str()),
VK_KERNEL_FROM_STR(kernel_name),
global_size,
local_size,
// Inputs and Outputs
Expand Down
7 changes: 3 additions & 4 deletions backends/vulkan/runtime/graph/ops/impl/Pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,8 @@ void add_max_pool2d_node(
api::utils::uvec3 global_size = t_out.virtual_extents();
api::utils::uvec3 local_size = adaptive_work_group_size(global_size);

std::stringstream kernel_name;
kernel_name << "max_pool2d";
apply_dtype_suffix(kernel_name, t_out);
std::string kernel_name("max_pool2d");
add_dtype_suffix(kernel_name, t_out);

KernelParams kernel_params = create_kernel_params(
graph,
Expand All @@ -87,7 +86,7 @@ void add_max_pool2d_node(

graph.execute_nodes().emplace_back(new ExecuteNode(
graph,
VK_KERNEL_FROM_STR(kernel_name.str()),
VK_KERNEL_FROM_STR(kernel_name),
global_size,
local_size,
// Inputs and Outputs
Expand Down
10 changes: 5 additions & 5 deletions backends/vulkan/runtime/graph/ops/impl/Sum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,17 +71,17 @@ void add_sum_dim_node(
api::utils::uvec3 global_size = t_out.virtual_extents();
api::utils::uvec3 local_size = adaptive_work_group_size(global_size);

std::stringstream kernel_name;
kernel_name << "sum_dim";
std::string kernel_name("sum_dim");
kernel_name.reserve(kShaderNameReserve);
if (keepdim) {
kernel_name << "_keepdim";
kernel_name += "_keepdim";
}

apply_dtype_suffix(kernel_name, t_out);
add_dtype_suffix(kernel_name, t_out);

graph.execute_nodes().emplace_back(new ExecuteNode(
graph,
VK_KERNEL_FROM_STR(kernel_name.str()),
VK_KERNEL_FROM_STR(kernel_name),
global_size,
local_size,
// Inputs and Outputs
Expand Down
7 changes: 3 additions & 4 deletions backends/vulkan/runtime/graph/ops/impl/UnaryOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,12 @@ void add_unary_op_node(
api::utils::uvec3 global_size = t_out.virtual_extents();
api::utils::uvec3 local_size = adaptive_work_group_size(global_size);

std::stringstream kernel_name;
kernel_name << op_name;
apply_dtype_suffix(kernel_name, t_out);
std::string kernel_name(op_name);
add_dtype_suffix(kernel_name, t_out);

graph.execute_nodes().emplace_back(new ExecuteNode(
graph,
VK_KERNEL_FROM_STR(kernel_name.str()),
VK_KERNEL_FROM_STR(kernel_name),
global_size,
local_size,
// Inputs and Outputs
Expand Down
24 changes: 11 additions & 13 deletions backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,47 +10,45 @@

namespace vkcompute {

void apply_dtype_suffix(std::stringstream& kernel_name, const vTensor& tensor) {
void add_dtype_suffix(std::string& kernel_name, const vTensor& tensor) {
switch (tensor.image().format()) {
case VK_FORMAT_R32G32B32A32_SFLOAT:
kernel_name << "_float";
kernel_name += "_float";
break;
case VK_FORMAT_R16G16B16A16_SFLOAT:
kernel_name << "_half";
kernel_name += "_half";
break;
case VK_FORMAT_R32G32B32A32_SINT:
kernel_name << "_int";
kernel_name += "_int";
break;
default:
break;
}
}

void apply_ndim_suffix(std::stringstream& kernel_name, const vTensor& tensor) {
void add_ndim_suffix(std::string& kernel_name, const vTensor& tensor) {
switch (tensor.storage_type()) {
case api::kTexture3D:
kernel_name << "_3d";
kernel_name += "_3d";
break;
case api::kTexture2D:
kernel_name << "_2d";
kernel_name += "_2d";
break;
default:
break;
}
}

void apply_memory_layout_suffix(
std::stringstream& kernel_name,
const vTensor& tensor) {
void add_memory_layout_suffix(std::string& kernel_name, const vTensor& tensor) {
switch (tensor.gpu_memory_layout()) {
case api::kChannelsPacked:
kernel_name << "_C_packed";
kernel_name += "_C_packed";
break;
case api::kHeightPacked:
kernel_name << "_H_packed";
kernel_name += "_H_packed";
break;
case api::kWidthPacked:
kernel_name << "_W_packed";
kernel_name += "_W_packed";
break;
default:
break;
Expand Down
12 changes: 6 additions & 6 deletions backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,16 @@

#include <executorch/backends/vulkan/runtime/api/api.h>

#include <sstream>
#include <string>

namespace vkcompute {

void apply_dtype_suffix(std::stringstream& kernel_name, const vTensor& tensor);
constexpr size_t kShaderNameReserve = 64u;

void apply_ndim_suffix(std::stringstream& kernel_name, const vTensor& tensor);
void add_dtype_suffix(std::string& kernel_name, const vTensor& tensor);

void apply_memory_layout_suffix(
std::stringstream& kernel_name,
const vTensor& tensor);
void add_ndim_suffix(std::string& kernel_name, const vTensor& tensor);

void add_memory_layout_suffix(std::string& kernel_name, const vTensor& tensor);

} // namespace vkcompute
26 changes: 14 additions & 12 deletions backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,47 +94,49 @@ api::ShaderInfo get_nchw_to_image_shader(const vTensor& v_dst) {
VK_THROW("Quantized Tensors are currently not supported!");
}

std::stringstream kernel_name;
std::string kernel_name;
kernel_name.reserve(kShaderNameReserve);

switch (v_dst.storage_type()) {
case api::kTexture3D:
kernel_name << "nchw_to_image3d";
kernel_name = "nchw_to_image3d";
break;
case api::kTexture2D:
kernel_name << "nchw_to_image2d";
kernel_name = "nchw_to_image2d";
break;
default:
VK_THROW("No kernel available!");
}

apply_memory_layout_suffix(kernel_name, v_dst);
apply_dtype_suffix(kernel_name, v_dst);
add_memory_layout_suffix(kernel_name, v_dst);
add_dtype_suffix(kernel_name, v_dst);

return VK_KERNEL_FROM_STR(kernel_name.str());
return VK_KERNEL_FROM_STR(kernel_name);
}

api::ShaderInfo get_image_to_nchw_shader(const vTensor& v_src) {
if (v_src.is_quantized()) {
VK_THROW("Quantized Tensors are currently not supported!");
}

std::stringstream kernel_name;
std::string kernel_name;
kernel_name.reserve(kShaderNameReserve);

switch (v_src.storage_type()) {
case api::kTexture3D:
kernel_name << "image3d_to_nchw";
kernel_name = "image3d_to_nchw";
break;
case api::kTexture2D:
kernel_name << "image2d_to_nchw";
kernel_name = "image2d_to_nchw";
break;
default:
VK_THROW("No kernel available!");
}

apply_memory_layout_suffix(kernel_name, v_src);
apply_dtype_suffix(kernel_name, v_src);
add_memory_layout_suffix(kernel_name, v_src);
add_dtype_suffix(kernel_name, v_src);

return VK_KERNEL_FROM_STR(kernel_name.str());
return VK_KERNEL_FROM_STR(kernel_name);
}

} // namespace vkcompute
19 changes: 9 additions & 10 deletions backends/vulkan/test/utils/test_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,15 @@ void record_conv2d_prepack_weights_op(
const bool transposed) {
api::PipelineBarrier pipeline_barrier{};

std::stringstream kernel_name;
std::string kernel_name;
if (transposed) {
kernel_name << "conv_transpose2d";
kernel_name = "conv_transpose2d";
} else {
kernel_name << "conv2d";
kernel_name = "conv2d";
}
kernel_name << "_prepack_weights";
apply_dtype_suffix(kernel_name, v_dst);
api::ShaderInfo shader = VK_KERNEL_FROM_STR(kernel_name.str());
kernel_name += "_prepack_weights";
add_dtype_suffix(kernel_name, v_dst);
api::ShaderInfo shader = VK_KERNEL_FROM_STR(kernel_name);

api::UniformParamsBuffer original_sizes_ubo(
context, api::utils::make_ivec4(original_sizes, /*reverse = */ true));
Expand Down Expand Up @@ -100,13 +100,12 @@ void record_binary_op(
vTensor& v_in1,
vTensor& v_in2,
vTensor& v_dst) {
std::stringstream kernel_name;
kernel_name << "binary_" << op_name << "_nobroadcast__test";
apply_dtype_suffix(kernel_name, v_dst);
std::string kernel_name = "binary_" + op_name + "_nobroadcast__test";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

building a string with + this way isn't efficient; reserve and call append(). for the record, there are primitives for this in more modern C++ like folly::to and fmt::format and eventually std::format.

add_dtype_suffix(kernel_name, v_dst);

api::PipelineBarrier pipeline_barrier{};
context->submit_compute_job(
VK_KERNEL_FROM_STR(kernel_name.str()),
VK_KERNEL_FROM_STR(kernel_name),
pipeline_barrier,
v_dst.virtual_extents(),
adaptive_work_group_size(v_dst.virtual_extents()),
Expand Down
16 changes: 7 additions & 9 deletions backends/vulkan/test/vulkan_compute_api_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,8 @@ TEST_F(VulkanComputeAPITest, update_params_between_submit) {
std::vector<int64_t> sizes = {4, 4, 2};
vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);

std::stringstream kernel_name;
kernel_name << "fill_texture__test";
apply_dtype_suffix(kernel_name, a);
std::string kernel_name("fill_texture__test");
add_dtype_suffix(kernel_name, a);
Comment on lines +52 to +53
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto ditto


struct Params final {
api::utils::ivec3 size;
Expand All @@ -70,7 +69,7 @@ TEST_F(VulkanComputeAPITest, update_params_between_submit) {
{
api::PipelineBarrier pipeline_barrier{};
api::context()->submit_compute_job(
VK_KERNEL_FROM_STR(kernel_name.str()),
VK_KERNEL_FROM_STR(kernel_name),
pipeline_barrier,
{4, 4, 4},
{4, 4, 4},
Expand Down Expand Up @@ -748,15 +747,14 @@ void run_from_gpu_test(
vTensor vten =
vTensor(api::context(), sizes, api::kFloat, storage_type, memory_layout);

std::stringstream kernel_name;
kernel_name << "idx_fill_texture";
apply_memory_layout_suffix(kernel_name, vten);
apply_dtype_suffix(kernel_name, vten);
std::string kernel_name("idx_fill_texture");
add_memory_layout_suffix(kernel_name, vten);
add_dtype_suffix(kernel_name, vten);
Comment on lines +750 to +752
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto missing reserve; inline size is as little as 16 bytes with libstdc++ (which I don't know if we use).


{
api::PipelineBarrier pipeline_barrier{};
api::context()->submit_compute_job(
VK_KERNEL_FROM_STR(kernel_name.str()),
VK_KERNEL_FROM_STR(kernel_name),
pipeline_barrier,
vten.virtual_extents(),
{4, 4, 4},
Expand Down