Skip to content

Commit a983ebc

Browse files
SS-JIAfacebook-github-bot
authored andcommitted
Replace std::stringstream with std::string for Shader names (#2964)
Summary: Pull Request resolved: #2964 ## Context Some research into efficient string concatenation suggests that streams in C++ are not quite efficient. The best way to concatenate strings seems to be creating a `std::string` and reserving sufficient capacity for the `std::string`. This diff deprecates the usage of `std::stringstream` when constructing kernel names in favor of using `std::string` directly. Reviewed By: copyrightly Differential Revision: D55951475 fbshipit-source-id: a1a584669e80984b85d11b7d6d4f7593290e562b
1 parent d993797 commit a983ebc

File tree

12 files changed

+86
-90
lines changed

12 files changed

+86
-90
lines changed

backends/vulkan/runtime/graph/ops/PrepackNode.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,10 @@
1717
namespace vkcompute {
1818

1919
api::ShaderInfo get_noop_shader(ComputeGraph& graph, const ValueRef packed) {
20-
std::stringstream noop_shader_name;
21-
noop_shader_name << "no_op";
22-
apply_ndim_suffix(noop_shader_name, graph.get_val(packed).toTensor());
23-
apply_dtype_suffix(noop_shader_name, graph.get_val(packed).toTensor());
24-
return VK_KERNEL_FROM_STR(noop_shader_name.str());
20+
std::string noop_shader_name("no_op");
21+
add_ndim_suffix(noop_shader_name, graph.get_val(packed).toTensor());
22+
add_dtype_suffix(noop_shader_name, graph.get_val(packed).toTensor());
23+
return VK_KERNEL_FROM_STR(noop_shader_name);
2524
}
2625

2726
PrepackNode::PrepackNode(

backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -75,14 +75,15 @@ void add_binary_op_node(
7575
const api::utils::ivec2 broadcast_params =
7676
create_broadcast_params(t_in1, t_in2);
7777

78-
std::stringstream kernel_name;
79-
kernel_name << "binary_" << op_name;
80-
apply_memory_layout_suffix(kernel_name, t_out);
81-
apply_dtype_suffix(kernel_name, t_out);
78+
std::string kernel_name("binary_");
79+
kernel_name.reserve(kShaderNameReserve);
80+
kernel_name += op_name;
81+
add_memory_layout_suffix(kernel_name, t_out);
82+
add_dtype_suffix(kernel_name, t_out);
8283

8384
graph.execute_nodes().emplace_back(new ExecuteNode(
8485
graph,
85-
VK_KERNEL_FROM_STR(kernel_name.str()),
86+
VK_KERNEL_FROM_STR(kernel_name),
8687
global_size,
8788
local_size,
8889
// Inputs and Outputs

backends/vulkan/runtime/graph/ops/impl/Conv2d.cpp

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -94,40 +94,41 @@ api::ShaderInfo get_conv2d_shader(
9494
const bool prepack_weights,
9595
const Conv2dMethod method,
9696
const ValueRef weight) {
97-
std::stringstream kernel_name;
97+
std::string kernel_name;
98+
kernel_name.reserve(kShaderNameReserve);
9899
switch (method) {
99100
case Conv2dMethod::Depthwise:
100-
kernel_name << "conv2d_dw";
101+
kernel_name = "conv2d_dw";
101102
if (!prepack_weights) {
102103
const auto& weight_sizes = graph.get_val(weight).toTensorRef().sizes;
103104
if (weight_sizes.at(2) == 3 && weight_sizes.at(3) == 3) {
104-
kernel_name << "_output_tile_3x3";
105+
kernel_name += "_output_tile_3x3";
105106
}
106107
if (weight_sizes.at(2) == 5 && weight_sizes.at(3) == 5) {
107-
kernel_name << "_output_tile_5x5";
108+
kernel_name += "_output_tile_5x5";
108109
}
109110
}
110111
break;
111112
case Conv2dMethod::Pointwise:
112113
if (prepack_weights) {
113-
kernel_name << "conv2d";
114+
kernel_name = "conv2d";
114115
} else {
115-
kernel_name << "conv2d_pw";
116+
kernel_name = "conv2d_pw";
116117
}
117118
break;
118119
case Conv2dMethod::SlidingWindow:
119-
kernel_name << "conv2d";
120+
kernel_name = "conv2d";
120121
break;
121122
case Conv2dMethod::Transposed:
122-
kernel_name << "conv_transpose2d";
123+
kernel_name = "conv_transpose2d";
123124
break;
124125
}
125126
if (prepack_weights) {
126-
kernel_name << "_prepack_weights";
127+
kernel_name += "_prepack_weights";
127128
}
128-
apply_dtype_suffix(kernel_name, t_out);
129+
add_dtype_suffix(kernel_name, t_out);
129130

130-
return VK_KERNEL_FROM_STR(kernel_name.str());
131+
return VK_KERNEL_FROM_STR(kernel_name);
131132
}
132133

133134
std::vector<int64_t> get_final_sizes(

backends/vulkan/runtime/graph/ops/impl/MatMul.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -78,15 +78,15 @@ void add_matmul_node(
7878
api::utils::uvec3 global_size = t_out.virtual_extents();
7979
api::utils::uvec3 local_size = adaptive_work_group_size(global_size);
8080

81-
std::stringstream kernel_name;
82-
kernel_name << "matmul";
83-
apply_memory_layout_suffix(kernel_name, t_mat1);
84-
apply_memory_layout_suffix(kernel_name, t_mat2);
85-
apply_dtype_suffix(kernel_name, t_out);
81+
std::string kernel_name("matmul");
82+
kernel_name.reserve(kShaderNameReserve);
83+
add_memory_layout_suffix(kernel_name, t_mat1);
84+
add_memory_layout_suffix(kernel_name, t_mat2);
85+
add_dtype_suffix(kernel_name, t_out);
8686

8787
graph.execute_nodes().emplace_back(new ExecuteNode(
8888
graph,
89-
VK_KERNEL_FROM_STR(kernel_name.str()),
89+
VK_KERNEL_FROM_STR(kernel_name),
9090
global_size,
9191
local_size,
9292
// Inputs and Outputs

backends/vulkan/runtime/graph/ops/impl/Pool.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,8 @@ void add_max_pool2d_node(
7373
api::utils::uvec3 global_size = t_out.virtual_extents();
7474
api::utils::uvec3 local_size = adaptive_work_group_size(global_size);
7575

76-
std::stringstream kernel_name;
77-
kernel_name << "max_pool2d";
78-
apply_dtype_suffix(kernel_name, t_out);
76+
std::string kernel_name("max_pool2d");
77+
add_dtype_suffix(kernel_name, t_out);
7978

8079
KernelParams kernel_params = create_kernel_params(
8180
graph,
@@ -87,7 +86,7 @@ void add_max_pool2d_node(
8786

8887
graph.execute_nodes().emplace_back(new ExecuteNode(
8988
graph,
90-
VK_KERNEL_FROM_STR(kernel_name.str()),
89+
VK_KERNEL_FROM_STR(kernel_name),
9190
global_size,
9291
local_size,
9392
// Inputs and Outputs

backends/vulkan/runtime/graph/ops/impl/Sum.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,17 +71,17 @@ void add_sum_dim_node(
7171
api::utils::uvec3 global_size = t_out.virtual_extents();
7272
api::utils::uvec3 local_size = adaptive_work_group_size(global_size);
7373

74-
std::stringstream kernel_name;
75-
kernel_name << "sum_dim";
74+
std::string kernel_name("sum_dim");
75+
kernel_name.reserve(kShaderNameReserve);
7676
if (keepdim) {
77-
kernel_name << "_keepdim";
77+
kernel_name += "_keepdim";
7878
}
7979

80-
apply_dtype_suffix(kernel_name, t_out);
80+
add_dtype_suffix(kernel_name, t_out);
8181

8282
graph.execute_nodes().emplace_back(new ExecuteNode(
8383
graph,
84-
VK_KERNEL_FROM_STR(kernel_name.str()),
84+
VK_KERNEL_FROM_STR(kernel_name),
8585
global_size,
8686
local_size,
8787
// Inputs and Outputs

backends/vulkan/runtime/graph/ops/impl/UnaryOp.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,12 @@ void add_unary_op_node(
4444
api::utils::uvec3 global_size = t_out.virtual_extents();
4545
api::utils::uvec3 local_size = adaptive_work_group_size(global_size);
4646

47-
std::stringstream kernel_name;
48-
kernel_name << op_name;
49-
apply_dtype_suffix(kernel_name, t_out);
47+
std::string kernel_name(op_name);
48+
add_dtype_suffix(kernel_name, t_out);
5049

5150
graph.execute_nodes().emplace_back(new ExecuteNode(
5251
graph,
53-
VK_KERNEL_FROM_STR(kernel_name.str()),
52+
VK_KERNEL_FROM_STR(kernel_name),
5453
global_size,
5554
local_size,
5655
// Inputs and Outputs

backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.cpp

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,47 +10,45 @@
1010

1111
namespace vkcompute {
1212

13-
void apply_dtype_suffix(std::stringstream& kernel_name, const vTensor& tensor) {
13+
void add_dtype_suffix(std::string& kernel_name, const vTensor& tensor) {
1414
switch (tensor.image().format()) {
1515
case VK_FORMAT_R32G32B32A32_SFLOAT:
16-
kernel_name << "_float";
16+
kernel_name += "_float";
1717
break;
1818
case VK_FORMAT_R16G16B16A16_SFLOAT:
19-
kernel_name << "_half";
19+
kernel_name += "_half";
2020
break;
2121
case VK_FORMAT_R32G32B32A32_SINT:
22-
kernel_name << "_int";
22+
kernel_name += "_int";
2323
break;
2424
default:
2525
break;
2626
}
2727
}
2828

29-
void apply_ndim_suffix(std::stringstream& kernel_name, const vTensor& tensor) {
29+
void add_ndim_suffix(std::string& kernel_name, const vTensor& tensor) {
3030
switch (tensor.storage_type()) {
3131
case api::kTexture3D:
32-
kernel_name << "_3d";
32+
kernel_name += "_3d";
3333
break;
3434
case api::kTexture2D:
35-
kernel_name << "_2d";
35+
kernel_name += "_2d";
3636
break;
3737
default:
3838
break;
3939
}
4040
}
4141

42-
void apply_memory_layout_suffix(
43-
std::stringstream& kernel_name,
44-
const vTensor& tensor) {
42+
void add_memory_layout_suffix(std::string& kernel_name, const vTensor& tensor) {
4543
switch (tensor.gpu_memory_layout()) {
4644
case api::kChannelsPacked:
47-
kernel_name << "_C_packed";
45+
kernel_name += "_C_packed";
4846
break;
4947
case api::kHeightPacked:
50-
kernel_name << "_H_packed";
48+
kernel_name += "_H_packed";
5149
break;
5250
case api::kWidthPacked:
53-
kernel_name << "_W_packed";
51+
kernel_name += "_W_packed";
5452
break;
5553
default:
5654
break;

backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,16 @@
1010

1111
#include <executorch/backends/vulkan/runtime/api/api.h>
1212

13-
#include <sstream>
13+
#include <string>
1414

1515
namespace vkcompute {
1616

17-
void apply_dtype_suffix(std::stringstream& kernel_name, const vTensor& tensor);
17+
constexpr size_t kShaderNameReserve = 64u;
1818

19-
void apply_ndim_suffix(std::stringstream& kernel_name, const vTensor& tensor);
19+
void add_dtype_suffix(std::string& kernel_name, const vTensor& tensor);
2020

21-
void apply_memory_layout_suffix(
22-
std::stringstream& kernel_name,
23-
const vTensor& tensor);
21+
void add_ndim_suffix(std::string& kernel_name, const vTensor& tensor);
22+
23+
void add_memory_layout_suffix(std::string& kernel_name, const vTensor& tensor);
2424

2525
} // namespace vkcompute

backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -100,47 +100,49 @@ api::ShaderInfo get_nchw_to_image_shader(const vTensor& v_dst) {
100100
VK_THROW("Quantized Tensors are currently not supported!");
101101
}
102102

103-
std::stringstream kernel_name;
103+
std::string kernel_name;
104+
kernel_name.reserve(kShaderNameReserve);
104105

105106
switch (v_dst.storage_type()) {
106107
case api::kTexture3D:
107-
kernel_name << "nchw_to_image3d";
108+
kernel_name = "nchw_to_image3d";
108109
break;
109110
case api::kTexture2D:
110-
kernel_name << "nchw_to_image2d";
111+
kernel_name = "nchw_to_image2d";
111112
break;
112113
default:
113114
VK_THROW("No kernel available!");
114115
}
115116

116-
apply_memory_layout_suffix(kernel_name, v_dst);
117-
apply_dtype_suffix(kernel_name, v_dst);
117+
add_memory_layout_suffix(kernel_name, v_dst);
118+
add_dtype_suffix(kernel_name, v_dst);
118119

119-
return VK_KERNEL_FROM_STR(kernel_name.str());
120+
return VK_KERNEL_FROM_STR(kernel_name);
120121
}
121122

122123
api::ShaderInfo get_image_to_nchw_shader(const vTensor& v_src) {
123124
if (v_src.is_quantized()) {
124125
VK_THROW("Quantized Tensors are currently not supported!");
125126
}
126127

127-
std::stringstream kernel_name;
128+
std::string kernel_name;
129+
kernel_name.reserve(kShaderNameReserve);
128130

129131
switch (v_src.storage_type()) {
130132
case api::kTexture3D:
131-
kernel_name << "image3d_to_nchw";
133+
kernel_name = "image3d_to_nchw";
132134
break;
133135
case api::kTexture2D:
134-
kernel_name << "image2d_to_nchw";
136+
kernel_name = "image2d_to_nchw";
135137
break;
136138
default:
137139
VK_THROW("No kernel available!");
138140
}
139141

140-
apply_memory_layout_suffix(kernel_name, v_src);
141-
apply_dtype_suffix(kernel_name, v_src);
142+
add_memory_layout_suffix(kernel_name, v_src);
143+
add_dtype_suffix(kernel_name, v_src);
142144

143-
return VK_KERNEL_FROM_STR(kernel_name.str());
145+
return VK_KERNEL_FROM_STR(kernel_name);
144146
}
145147

146148
} // namespace vkcompute

backends/vulkan/test/utils/test_utils.cpp

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -63,15 +63,15 @@ void record_conv2d_prepack_weights_op(
6363
const bool transposed) {
6464
api::PipelineBarrier pipeline_barrier{};
6565

66-
std::stringstream kernel_name;
66+
std::string kernel_name;
6767
if (transposed) {
68-
kernel_name << "conv_transpose2d";
68+
kernel_name = "conv_transpose2d";
6969
} else {
70-
kernel_name << "conv2d";
70+
kernel_name = "conv2d";
7171
}
72-
kernel_name << "_prepack_weights";
73-
apply_dtype_suffix(kernel_name, v_dst);
74-
api::ShaderInfo shader = VK_KERNEL_FROM_STR(kernel_name.str());
72+
kernel_name += "_prepack_weights";
73+
add_dtype_suffix(kernel_name, v_dst);
74+
api::ShaderInfo shader = VK_KERNEL_FROM_STR(kernel_name);
7575

7676
api::UniformParamsBuffer original_sizes_ubo(
7777
context, api::utils::make_ivec4(original_sizes, /*reverse = */ true));
@@ -100,13 +100,12 @@ void record_binary_op(
100100
vTensor& v_in1,
101101
vTensor& v_in2,
102102
vTensor& v_dst) {
103-
std::stringstream kernel_name;
104-
kernel_name << "binary_" << op_name << "_nobroadcast__test";
105-
apply_dtype_suffix(kernel_name, v_dst);
103+
std::string kernel_name = "binary_" + op_name + "_nobroadcast__test";
104+
add_dtype_suffix(kernel_name, v_dst);
106105

107106
api::PipelineBarrier pipeline_barrier{};
108107
context->submit_compute_job(
109-
VK_KERNEL_FROM_STR(kernel_name.str()),
108+
VK_KERNEL_FROM_STR(kernel_name),
110109
pipeline_barrier,
111110
v_dst.virtual_extents(),
112111
adaptive_work_group_size(v_dst.virtual_extents()),

backends/vulkan/test/vulkan_compute_api_test.cpp

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,8 @@ TEST_F(VulkanComputeAPITest, update_params_between_submit) {
4949
std::vector<int64_t> sizes = {4, 4, 2};
5050
vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
5151

52-
std::stringstream kernel_name;
53-
kernel_name << "fill_texture__test";
54-
apply_dtype_suffix(kernel_name, a);
52+
std::string kernel_name("fill_texture__test");
53+
add_dtype_suffix(kernel_name, a);
5554

5655
struct Params final {
5756
api::utils::ivec3 size;
@@ -70,7 +69,7 @@ TEST_F(VulkanComputeAPITest, update_params_between_submit) {
7069
{
7170
api::PipelineBarrier pipeline_barrier{};
7271
api::context()->submit_compute_job(
73-
VK_KERNEL_FROM_STR(kernel_name.str()),
72+
VK_KERNEL_FROM_STR(kernel_name),
7473
pipeline_barrier,
7574
{4, 4, 4},
7675
{4, 4, 4},
@@ -748,15 +747,14 @@ void run_from_gpu_test(
748747
vTensor vten =
749748
vTensor(api::context(), sizes, api::kFloat, storage_type, memory_layout);
750749

751-
std::stringstream kernel_name;
752-
kernel_name << "idx_fill_texture";
753-
apply_memory_layout_suffix(kernel_name, vten);
754-
apply_dtype_suffix(kernel_name, vten);
750+
std::string kernel_name("idx_fill_texture");
751+
add_memory_layout_suffix(kernel_name, vten);
752+
add_dtype_suffix(kernel_name, vten);
755753

756754
{
757755
api::PipelineBarrier pipeline_barrier{};
758756
api::context()->submit_compute_job(
759-
VK_KERNEL_FROM_STR(kernel_name.str()),
757+
VK_KERNEL_FROM_STR(kernel_name),
760758
pipeline_barrier,
761759
vten.virtual_extents(),
762760
{4, 4, 4},

0 commit comments

Comments
 (0)