Skip to content

Commit 86b818d

Browse files
trivedivivekfacebook-github-bot
authored andcommitted
Using width packed bias in conv1d op to slightly improve speed and memory. (#10733)
Summary: This diff changes bias tensor packing for conv1d op from channels to width packed, which reduces tensor memory footprint and reduces wasted texel fetch. Reviewed By: SS-JIA Differential Revision: D74208485
1 parent 1ae8c2c commit 86b818d

File tree

2 files changed

+8
-5
lines changed

2 files changed

+8
-5
lines changed

backends/vulkan/runtime/graph/ops/glsl/conv1d.glsl

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,9 @@ void main() {
8686
const int in_l = out_l * stride - padding;
8787
VEC4_T sum = VEC4_T(0);
8888

89+
const int out_c_packed_index = out_c >> 2;
90+
const int out_c_packed_lane = out_c & 0x3;
91+
8992
for (int in_c = c_start; in_c < c_end; ++in_c) {
9093
// "k" tracks the kernel's index for our input-kernel computation.
9194
// It reads out-of-bound zeros, but trying to avoid them complicates
@@ -103,16 +106,16 @@ void main() {
103106
// It is possible to further reduce the memory footprint by swapping the
104107
// dimensions, using x extent for out_channel, and y for kernel.
105108
for (int k = 0; k < kernel_size; k++) {
106-
const ivec3 w_lpos = ivec3(k, in_c % in_group_size, out_c / 4);
109+
const ivec3 w_lpos = ivec3(k, in_c % in_group_size, out_c_packed_index);
107110
const VEC4_T weight_texel = load_texel_lpos(kernel_in, w_lpos, kernel_axis_map);
108-
VEC4_T weight = VEC4_T(weight_texel[out_c % 4]);
111+
VEC4_T weight = VEC4_T(weight_texel[out_c_packed_lane]);
109112

110113
const ivec3 in_pos = lpos_to_pos(ivec3(in_l + k * dilation, in_c, N), in_axis_map);
111114
sum = fma(weight, load_texel(t_in, in_pos), sum);
112115
}
113116
}
114117

115-
const VEC4_T bias = load_texel_lpos(bias_in, ivec3(out_c, 0, 0), bias_axis_map);
118+
const VEC4_T bias = load_texel_lpos(bias_in, ivec3(out_c_packed_index, 0, 0), bias_axis_map);
116119
const ivec3 out_lpos = ivec3(out_l, out_c, N);
117-
write_texel_lpos(t_out, out_lpos, op(sum + bias.x, out_min, out_max), out_axis_map);
120+
write_texel_lpos(t_out, out_lpos, op(sum + bias[out_c_packed_lane], out_min, out_max), out_axis_map);
118121
}

backends/vulkan/runtime/graph/ops/impl/Convolution.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,7 @@ void add_conv1d_node(
483483
weight,
484484
/*transposed = */ false,
485485
/*storage_type = */ utils::kTexture3D,
486-
/*memory_layout = */ utils::kChannelsPacked);
486+
/*memory_layout = */ utils::kWidthPacked);
487487

488488
float out_min_val = 0.0f;
489489
float out_max_val = 0.0f;

0 commit comments

Comments
 (0)