Skip to content

Commit e148c1d

Browse files
SS-JIAfacebook-github-bot
authored andcommitted
vTensor cleanup 5/N - clean up indexing_utils.h and clarify function names (#5443)
Summary: Pull Request resolved: #5443 ## Context The goal of this diff is to clean up the `indexing_utils.h` header file to introduce more consistent terminology for tensor properties and improve the function names so that they are crystal clear on what they are doing. ## Notes for reviewers As with the last diff, the majority of meaningful changes are in `indexing_utils.h` and the rest are changes to function names. There should be no functionality changes, with the exception of re-writing functions which interact with texture position so that input arguments can be const. ghstack-source-id: 243365712 Reviewed By: jorgep31415 Differential Revision: D62901892 fbshipit-source-id: 2e8447f4403eebbd9aa6fd0aaa3768d30b0b45c3
1 parent 2afcd96 commit e148c1d

20 files changed

+281
-326
lines changed

backends/vulkan/runtime/graph/ops/glsl/addmm_naive_texture3d.glsl

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ ${layout_declare_tensor(B, "r", "mat2_tensor", DTYPE, "texture3d")}
2424
$if HAS_BIAS:
2525
${layout_declare_tensor(B, "r", "bias_tensor", DTYPE, "texture3d")}
2626
${layout_declare_ubo(B, "ivec4", "out_sizes")}
27-
${layout_declare_ubo(B, "ivec3", "out_logical_limits")}
27+
${layout_declare_ubo(B, "ivec3", "out_limits")}
2828
${layout_declare_ubo(B, "ivec4", "out_axis_map")}
2929
${layout_declare_ubo(B, "ivec4", "mat1_sizes")}
3030
${layout_declare_ubo(B, "ivec4", "mat1_axis_map")}
@@ -63,11 +63,11 @@ vec4 get_bias_texel_W_packed(ivec3 logical_pos) {
6363
}
6464
#endif // HAS_BIAS
6565

66-
vec4 matmul_naive_k_dim_packed(const ivec3 out_mpos) {
66+
vec4 matmul_naive_k_dim_packed(const ivec3 out_lpos) {
6767
ivec3 mat1_pos;
6868
mat1_pos[mat1_axis_map.x] = 0;
69-
mat1_pos[mat1_axis_map.y] = out_mpos.y;
70-
mat1_pos[mat1_axis_map.z] = out_mpos.z;
69+
mat1_pos[mat1_axis_map.y] = out_lpos.y;
70+
mat1_pos[mat1_axis_map.z] = out_lpos.z;
7171
#ifdef MAT2_IS_TRANSPOSED
7272
const int mat2_k_axis = mat2_axis_map.x;
7373
const int mat2_row_axis = mat2_axis_map.y;
@@ -88,9 +88,9 @@ vec4 matmul_naive_k_dim_packed(const ivec3 out_mpos) {
8888
// latency. Surprisingly, this doesn't translate to mat1_pos.
8989
ivec3 mat2_pos = ivec3(0);
9090
mat2_pos[mat2_k_axis] = i;
91-
mat2_pos[mat2_row_axis] = out_mpos.x * 4 + r;
91+
mat2_pos[mat2_row_axis] = out_lpos.x * 4 + r;
9292
#ifndef MAT2_IS_TRANSPOSED
93-
mat2_pos[mat2_axis_map.z] = out_mpos.z;
93+
mat2_pos[mat2_axis_map.z] = out_lpos.z;
9494
#endif // MAT2_IS_TRANSPOSED
9595
sums[r] = dot(mat1_tex, texelFetch(mat2_tensor, mat2_pos, 0));
9696
}
@@ -103,16 +103,16 @@ vec4 matmul_naive_k_dim_packed(const ivec3 out_mpos) {
103103
return texel;
104104
}
105105

106-
vec4 matmul_naive_k_dim_packed_row_dim_packed(const ivec3 out_mpos) {
106+
vec4 matmul_naive_k_dim_packed_row_dim_packed(const ivec3 out_lpos) {
107107
ivec3 mat1_pos;
108108
mat1_pos[mat1_axis_map.x] = 0;
109-
mat1_pos[mat1_axis_map.y] = out_mpos.y;
110-
mat1_pos[mat1_axis_map.z] = out_mpos.z;
109+
mat1_pos[mat1_axis_map.y] = out_lpos.y;
110+
mat1_pos[mat1_axis_map.z] = out_lpos.z;
111111

112112
ivec3 mat2_pos;
113-
mat2_pos[mat2_axis_map.x] = out_mpos.x;
113+
mat2_pos[mat2_axis_map.x] = out_lpos.x;
114114
mat2_pos[mat2_axis_map.y] = 0;
115-
mat2_pos[mat2_axis_map.z] = out_mpos.z;
115+
mat2_pos[mat2_axis_map.z] = out_lpos.z;
116116

117117
ivec3 mat2_pos_offset = ivec3(0);
118118
mat2_pos_offset[mat2_axis_map.y] = 1;
@@ -131,9 +131,9 @@ vec4 matmul_naive_k_dim_packed_row_dim_packed(const ivec3 out_mpos) {
131131
// On-demand construction of mat2_pos appears to provide the lowest
132132
// latency. Surprisingly, this doesn't translate to mat1_pos.
133133
ivec3 mat2_pos = ivec3(0);
134-
mat2_pos[mat2_axis_map.x] = out_mpos.x;
134+
mat2_pos[mat2_axis_map.x] = out_lpos.x;
135135
mat2_pos[mat2_axis_map.y] = 4 * i + r;
136-
mat2_pos[mat2_axis_map.z] = out_mpos.z;
136+
mat2_pos[mat2_axis_map.z] = out_lpos.z;
137137

138138
vec4 mat1_comp_vec = vec4(mat1_tex[r]);
139139
texel = fma(mat1_comp_vec, texelFetch(mat2_tensor, mat2_pos, 0), texel);
@@ -144,33 +144,31 @@ vec4 matmul_naive_k_dim_packed_row_dim_packed(const ivec3 out_mpos) {
144144
}
145145

146146
void main() {
147-
const ivec3 out_mpos = ivec3(gl_GlobalInvocationID);
148-
if (any(greaterThanEqual(out_mpos, out_logical_limits))) {
147+
const ivec3 out_lpos = ivec3(gl_GlobalInvocationID);
148+
if (any(greaterThanEqual(out_lpos, out_limits))) {
149149
return;
150150
}
151151

152152
vec4 texel = vec4(0);
153153

154154
#ifdef MAT2_IS_TRANSPOSED
155155
if (mat2_packed_dim == W_DIM) {
156-
texel = matmul_naive_k_dim_packed(out_mpos);
156+
texel = matmul_naive_k_dim_packed(out_lpos);
157157
} else {
158-
texel = matmul_naive_k_dim_packed_row_dim_packed(out_mpos);
158+
texel = matmul_naive_k_dim_packed_row_dim_packed(out_lpos);
159159
}
160160
#else
161161
if (mat2_packed_dim == W_DIM) {
162-
texel = matmul_naive_k_dim_packed_row_dim_packed(out_mpos);
162+
texel = matmul_naive_k_dim_packed_row_dim_packed(out_lpos);
163163
} else {
164-
texel = matmul_naive_k_dim_packed(out_mpos);
164+
texel = matmul_naive_k_dim_packed(out_lpos);
165165
}
166166
#endif // MAT2_IS_TRANSPOSED
167167

168168
#ifdef HAS_BIAS
169-
vec4 bias_texel = get_bias_texel_W_packed(out_mpos);
169+
vec4 bias_texel = get_bias_texel_W_packed(out_lpos);
170170
texel = beta * bias_texel + alpha * texel;
171171
#endif // HAS_BIAS
172172

173-
ivec3 out_pos = to_texture_pos(out_mpos, out_axis_map);
174-
175-
imageStore(out_tensor, out_pos, texel);
173+
imageStore(out_tensor, lpos_to_pos(out_lpos, out_axis_map), texel);
176174
}

backends/vulkan/runtime/graph/ops/glsl/addmm_optimized.glsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ FloatMatrix matmul_partial(const ivec4 out_idx_tl) {
158158
//
159159

160160
void write_results_C_packed(const ivec4 out_idx_tl, FloatMatrix results) {
161-
ivec3 out_pos = to_texture_pos(
161+
ivec3 out_pos = tidx_to_pos(
162162
out_idx_tl, out_sizes, out_axis_map, out_packed_dim);
163163

164164
for (int tile_c = 0;

backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -36,28 +36,26 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
3636
layout(constant_id = 3) const int packed_dim = C_DIM;
3737

3838
void main() {
39-
// pos is physical (x, y, z), as global workgroup uses image extents
40-
const ivec3 pos = ivec3(gl_GlobalInvocationID);
41-
// physical pos (x, y, z) -> logical (w, c, h, n) output
42-
const ivec4 idx = to_tensor_idx(pos, out_sizes, out_axis_map, packed_dim);
39+
const ivec3 lpos = ivec3(gl_GlobalInvocationID);
40+
const ivec4 tidx = lpos_to_tidx(lpos, out_sizes, out_axis_map.w, packed_dim);
4341

44-
if (any(greaterThanEqual(idx, out_sizes))) {
42+
if (any(greaterThanEqual(tidx, out_sizes))) {
4543
return;
4644
}
4745

4846
// broadcast on logical sizes
49-
ivec4 in_idx = broadcast_indices(idx, in_sizes);
47+
ivec4 in_idx = broadcast_indices(tidx, in_sizes);
5048
VEC4_T in_texel = VEC4_T(load_texel(
5149
t_in,
5250
// read axis mapped texel
53-
to_texture_pos(in_idx, in_sizes, in_axis_map, packed_dim)));
51+
tidx_to_pos(in_idx, in_sizes, in_axis_map, packed_dim)));
5452

5553
// broadcast on logical sizes
56-
ivec4 other_idx = broadcast_indices(idx, other_sizes);
54+
ivec4 other_idx = broadcast_indices(tidx, other_sizes);
5755
VEC4_T other_texel = VEC4_T(load_texel(
5856
t_other,
5957
// read axis mapped texel
60-
to_texture_pos(other_idx, other_sizes, other_axis_map, packed_dim)));
58+
tidx_to_pos(other_idx, other_sizes, other_axis_map, packed_dim)));
6159

6260
// Check boolean broadcast flags; we use ivec2 instead of bvec2 for alignment.
6361
if (broadcast_params.x > 0) {
@@ -68,6 +66,6 @@ void main() {
6866
}
6967

7068
imageStore(t_out,
71-
to_texture_pos(idx, out_sizes, out_axis_map, packed_dim),
69+
tidx_to_pos(tidx, out_sizes, out_axis_map, packed_dim),
7270
VEC4_T(op(in_texel, other_texel, alpha)));
7371
}

backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.glsl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,13 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
2323
layout(constant_id = 3) const int UNUSED_packed_dim = W_DIM;
2424

2525
void main() {
26-
int out_id = int(gl_GlobalInvocationID.x);
27-
if (out_id >= numel) {
26+
int nchwi = int(gl_GlobalInvocationID.x);
27+
if (nchwi >= numel) {
2828
return;
2929
}
3030

31-
ivec4 t_in_idx = from_nchw_buffer_i(out_id, in_sizes);
32-
const int in_id = to_buffer_id(t_in_idx, in_strides);
31+
ivec4 in_tidx = nchwi_to_tidx(nchwi, in_sizes);
32+
const int in_bufi = tidx_to_bufi(in_tidx, in_strides);
3333

34-
nchw_buf[out_id] = t_in[in_id];
34+
nchw_buf[nchwi] = t_in[in_bufi];
3535
}

backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_prepack_weights.glsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ void main() {
5353
}
5454

5555
// Map tensor_idx to normal buffer_i
56-
const ivec4 p0 = get_texel_nchw_buffer_ixs(idx, sizes, packed_dim);
56+
const ivec4 p0 = tidx_to_nchwi(idx, sizes, packed_dim);
5757

5858
// Compute modified tensor_idx by inverting the CPU function
5959
const int N = original_sizes.w;

backends/vulkan/runtime/graph/ops/glsl/conv2d_prepack_weights.glsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ void main() {
5353
}
5454

5555
// Map tensor_idx to normal buffer_i
56-
const ivec4 p0 = get_texel_nchw_buffer_ixs(idx, sizes, packed_dim);
56+
const ivec4 p0 = tidx_to_nchwi(idx, sizes, packed_dim);
5757

5858
// Compute modified tensor_idx by inverting the CPU function
5959
const int N = original_sizes.w;

backends/vulkan/runtime/graph/ops/glsl/conv_transpose2d_prepack_weights.glsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ void main() {
5353
}
5454

5555
// Map tensor_idx to normal buffer_i
56-
const ivec4 p0 = get_texel_nchw_buffer_ixs(idx, sizes, packed_dim);
56+
const ivec4 p0 = tidx_to_nchwi(idx, sizes, packed_dim);
5757

5858
// Compute modified tensor_idx by inverting the CPU function
5959
const int N = original_sizes.w;

backends/vulkan/runtime/graph/ops/glsl/embedding.glsl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,23 +29,23 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
2929
layout(constant_id = 3) const int packed_dim = C_DIM;
3030

3131
void main() {
32-
const ivec3 out_pos = ivec3(gl_GlobalInvocationID);
33-
const ivec4 out_idx = to_tensor_idx(out_pos, sizes, out_axis_map, packed_dim);
34-
if (any(greaterThanEqual(out_idx, sizes))) {
32+
const ivec3 out_lpos = ivec3(gl_GlobalInvocationID);
33+
const ivec4 out_tidx = lpos_to_tidx(out_lpos, sizes, out_axis_map.w, packed_dim);
34+
if (any(greaterThanEqual(out_tidx, sizes))) {
3535
return;
3636
}
3737
VEC4_T out_texel;
3838

3939
// Consider optimizing via W-packing format for t_in and t_weight.
4040
for (int i = 0; i < 4; ++i) {
4141
// Read input tensor for embedding index.
42-
const ivec3 in_pos = to_texture_pos(ivec3(out_idx.y, out_idx.z * 4 + i, out_idx.w / 4), in_axis_map);
43-
const int in_texel_elem = load_texel(t_in, in_pos)[out_idx.w % 4];
42+
const ivec3 in_pos = lpos_to_pos(ivec3(out_tidx.y, out_tidx.z * 4 + i, out_tidx.w / 4), in_axis_map);
43+
const int in_texel_elem = load_texel(t_in, in_pos)[out_tidx.w % 4];
4444

4545
// Read weight tensor for embedding.
46-
const ivec3 weight_pos = to_texture_pos(ivec3(out_idx.x, in_texel_elem, 0), weight_axis_map);
46+
const ivec3 weight_pos = lpos_to_pos(ivec3(out_tidx.x, in_texel_elem, 0), weight_axis_map);
4747
out_texel[i] = load_texel(t_weight, weight_pos).x;
4848
}
4949

50-
imageStore(t_out, out_pos, out_texel);
50+
imageStore(t_out, lpos_to_pos(out_lpos, out_axis_map), out_texel);
5151
}

backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.glsl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
3131
layout(constant_id = 3) const int packed_dim = C_DIM;
3232

3333
void write_out_texel(VEC4_T texel, ivec4 tensor_idx) {
34-
const ivec4 buf_indices = get_texel_nchw_buffer_ixs(
34+
const ivec4 buf_indices = tidx_to_nchwi(
3535
tensor_idx,
3636
sizes,
3737
packed_dim);
@@ -51,13 +51,13 @@ void write_out_texel(VEC4_T texel, ivec4 tensor_idx) {
5151
}
5252

5353
void main() {
54-
const ivec3 pos = ivec3(gl_GlobalInvocationID);
55-
const ivec4 tensor_idx = to_tensor_idx(pos, sizes, axis_map, packed_dim);
54+
const ivec3 lpos = ivec3(gl_GlobalInvocationID);
55+
const ivec4 tidx = lpos_to_tidx(lpos, sizes, axis_map.w, packed_dim);
5656

57-
if (any(greaterThanEqual(tensor_idx, sizes))) {
57+
if (any(greaterThanEqual(tidx, sizes))) {
5858
return;
5959
}
6060

61-
const VEC4_T intex = load_texel(t_in, pos);
62-
write_out_texel(intex, tensor_idx);
61+
const VEC4_T intex = load_texel(t_in, lpos_to_pos(lpos, axis_map));
62+
write_out_texel(intex, tidx);
6363
}

backends/vulkan/runtime/graph/ops/glsl/index_select_channel.glsl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,18 +34,18 @@ void main() {
3434
}
3535

3636
const ivec4 idx = to_tensor_idx(out_pos, out_sizes, packed_dim);
37-
const ivec4 buffer_ixs = get_texel_nchw_buffer_ixs(idx, out_sizes, packed_dim);
37+
const ivec4 buffer_ixs = tidx_to_nchwi(idx, out_sizes, packed_dim);
3838

3939
VEC4_T out_texel;
4040
for (int i = 0; i < 4; ++i) {
41-
const ivec4 out_idx = from_nchw_buffer_i(buffer_ixs[i], out_sizes);
42-
int out_channel = out_idx.z;
41+
const ivec4 out_tidx = nchwi_to_tidx(buffer_ixs[i], out_sizes);
42+
int out_channel = out_tidx.z;
4343
int in_channel = texelFetch(t_idx, ivec3(out_channel, 0, 0), 0).x;
4444

45-
ivec4 in_idx = out_idx;
46-
in_idx.z = in_channel;
45+
ivec4 in_tidx = out_tidx;
46+
in_tidx.z = in_channel;
4747

48-
ivec4 in_elem_pos = to_texture_elem_pos(in_idx, in_sizes, packed_dim);
48+
ivec4 in_elem_pos = to_texture_elem_pos(in_tidx, in_sizes, packed_dim);
4949

5050
VEC4_T in_texel = texelFetch(t_in, in_elem_pos.xyz, 0);
5151

0 commit comments

Comments
 (0)