Skip to content

Commit a82b823

Browse files
[ET-VK] Performance improvement to few indexing functions. (#9171)
This PR was created by the merge bot to help merge the original PR into the main branch. ghstack PR number: #9138 by @trivedivivek ^ Please use this as the source of truth for the PR details, comments, and reviews ghstack PR base: https://github.com/pytorch/executorch/tree/gh/trivedivivek/61/base ghstack PR head: https://github.com/pytorch/executorch/tree/gh/trivedivivek/61/head Merge bot PR base: https://github.com/pytorch/executorch/tree/gh/trivedivivek/60/orig Merge bot PR head: https://github.com/pytorch/executorch/tree/gh/trivedivivek/61/orig @diff-train-skip-merge --------- Co-authored-by: Vivek Trivedi <[email protected]>
1 parent 1462960 commit a82b823

File tree

1 file changed

+10
-7
lines changed

1 file changed

+10
-7
lines changed

backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -104,16 +104,19 @@ ivec4 tidx_to_4bufi(
104104
}
105105

106106
ivec4 nchwi_to_tidx(const int nchwi, const ivec4 sizes) {
107+
const int nchwi_div_x = nchwi / sizes.x;
108+
const int nchwi_div_y = nchwi_div_x / sizes.y;
107109
return ivec4(
108110
nchwi % sizes.x,
109-
(nchwi / (sizes.x)) % sizes.y,
110-
(nchwi / (sizes.x * sizes.y)) % sizes.z,
111-
(nchwi / (sizes.x * sizes.y * sizes.z)));
111+
nchwi_div_x % sizes.y,
112+
nchwi_div_y % sizes.z,
113+
nchwi_div_y / sizes.z);
112114
}
113115

114116
int tidx_to_nchwi(const ivec4 tidx, const ivec4 sizes) {
115-
return tidx.w * sizes.x * sizes.y * sizes.z + tidx.z * sizes.x * sizes.y +
116-
tidx.y * sizes.x + tidx.x;
117+
const int sizes_xy = sizes.x * sizes.y;
118+
return tidx.w * sizes_xy * sizes.z + tidx.z * sizes_xy + tidx.y * sizes.x +
119+
tidx.x;
117120
}
118121

119122
// TODO(ssjia): make this function use dim order so that it can work with any
@@ -360,8 +363,8 @@ ivec4 to_texture_elem_pos(ivec4 idx, ivec4 sizes, int packed_dim) {
360363
// pos[4] is set to a placeholder value
361364
ivec4 pos = idx.xyzx;
362365
pos[BATCH_AXIS] += idx.w * sizes[BATCH_AXIS];
363-
pos[packed_dim] /= 4;
364-
pos.w = idx[packed_dim] % 4;
366+
pos[packed_dim] >>= 2;
367+
pos.w = idx[packed_dim] & 0x3;
365368
return pos;
366369
}
367370

0 commit comments

Comments
 (0)