Skip to content

Commit b4ed969

Browse files
committed
Update on "[ET-VK][6/n] aten.view_copy"
aten.view_copy, supporting all packing. Using ssjia's idea to do a direct lookup. Differential Revision: [D56281400](https://our.internmc.facebook.com/intern/diff/D56281400/) [ghstack-poisoned]
1 parent 27c90de commit b4ed969

File tree

6 files changed

+42
-55
lines changed

6 files changed

+42
-55
lines changed

backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,18 @@
99
#define divup4(x) ((x + 3) / 4)
1010

1111
// Input: idx is a ivec4 user-level coordinate, sizes is the tensor shape
12-
// Output: buffer_idx in the continous nchw-buffer.
13-
#define to_buffer_i(idx, sizes) \
14-
(idx.x + idx.y* sizes.x + idx.z* sizes.y* sizes.x + \
15-
idx.w* sizes.z* sizes.y* sizes.x)
12+
// Output: buffer_idx in the continuous nchw-buffer.
13+
#define to_buffer_i(idx, sizes) \
14+
(idx.x + idx.y * sizes.x + idx.z * sizes.y * sizes.x + \
15+
idx.w * sizes.z * sizes.y * sizes.x)
1616

1717
// Inverse of to_buffer_i
18-
// Input: buffer_idx in the continous nchw-buffer, sizes is the tensor shape
18+
// Input: buffer_idx in the continuous nchw-buffer, sizes is the tensor shape
1919
// Output: ivec4 user-level coorindate
20-
#define from_buffer_i(buf_i, sizes) \
21-
ivec4( \
22-
buf_i % sizes.x, \
23-
(buf_i / (sizes.x)) % sizes.y, \
20+
#define from_buffer_i(buf_i, sizes) \
21+
ivec4( \
22+
buf_i % sizes.x, \
23+
(buf_i / (sizes.x)) % sizes.y, \
2424
(buf_i / (sizes.x * sizes.y)) % sizes.z, \
2525
(buf_i / (sizes.x * sizes.y * sizes.z)))
2626

backends/vulkan/runtime/graph/ops/glsl/view.glsl

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -26,50 +26,46 @@ layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
2626
#define get_packed_stride get_packed_stride_${PACKING}
2727

2828
layout(set = 0, binding = 2) uniform PRECISION restrict OutGpuSizes {
29-
uvec4 data;
30-
}
31-
out_gpu_sizes;
29+
uvec4 out_gpu_sizes;
30+
};
3231

3332
layout(set = 0, binding = 3) uniform PRECISION restrict OutCpuSizes {
34-
uvec4 data;
35-
}
36-
out_cpu_sizes;
33+
uvec4 out_cpu_sizes;
34+
};
3735

3836
layout(set = 0, binding = 4) uniform PRECISION restrict InGpuSizes {
39-
uvec4 data;
40-
}
41-
in_gpu_sizes;
37+
uvec4 in_gpu_sizes;
38+
};
4239

4340
layout(set = 0, binding = 5) uniform PRECISION restrict InCpuSizes {
44-
uvec4 data;
45-
}
46-
in_cpu_sizes;
41+
uvec4 in_cpu_sizes;
42+
};
4743

4844
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
4945

5046

5147
void main() {
5248
const ivec3 out_pos = ivec3(gl_GlobalInvocationID);
53-
const ivec4 out_tensor_idx = to_tensor_idx(out_pos, out_gpu_sizes.data);
49+
const ivec4 out_tensor_idx = to_tensor_idx(out_pos, out_gpu_sizes);
5450

55-
if (all(greaterThanEqual(out_tensor_idx, out_gpu_sizes.data))) {
51+
if (all(greaterThanEqual(out_tensor_idx, out_gpu_sizes))) {
5652
return;
5753
}
5854

5955
// Assume there is a virtual continous buffer in nchw format. From the output
6056
// pos, we first calculate the index in the virual buffer, and then calculate
6157
// the input position from the indx.
6258

63-
const uint base_index = to_buffer_i(out_tensor_idx, out_cpu_sizes.data);
59+
const uint base_index = to_buffer_i(out_tensor_idx, out_cpu_sizes);
6460
const uvec4 buf_indices =
65-
base_index + ivec4(0, 1, 2, 3) * get_packed_stride(out_cpu_sizes.data);
61+
base_index + ivec4(0, 1, 2, 3) * get_packed_stride(out_cpu_sizes);
6662

6763
VEC4_T value;
6864
// Need to look up the 4 values in the output texel separately.
6965
for (int i=0; i<4; i++) {
70-
ivec4 user_coor = from_buffer_i(buf_indices[i], in_cpu_sizes.data);
66+
ivec4 user_coor = from_buffer_i(buf_indices[i], in_cpu_sizes);
7167

72-
ivec4 in_pos_elem = to_texture_pos_elem(user_coor, in_gpu_sizes.data);
68+
ivec4 in_pos_elem = to_texture_pos_elem(user_coor, in_gpu_sizes);
7369

7470
VEC4_T intex = VEC4_T(texelFetch(image_in, in_pos_elem.xyz, 0));
7571

backends/vulkan/runtime/graph/ops/glsl/view.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,3 @@ view:
1212
- VALUE: H_packed
1313
shader_variants:
1414
- NAME: view
15-

backends/vulkan/runtime/graph/ops/impl/View.cpp

Lines changed: 12 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,30 +8,22 @@
88

99
#include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>
1010

11-
#include <executorch/backends/vulkan/runtime/api/api.h>
12-
1311
#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.h>
1412
#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h>
1513
#include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>
1614

1715
namespace vkcompute {
1816

19-
void add_view_node(
20-
ComputeGraph& graph,
21-
ValueRef in,
22-
ValueRef size_ref,
23-
ValueRef out) {
24-
// Note: size_ref is not used here. Since the output tensor's size have been
25-
// determined during compilation.
17+
void add_view_node(ComputeGraph& graph, ValueRef in, ValueRef out) {
2618
vTensorPtr t_in = graph.get_tensor(in);
2719
vTensorPtr t_out = graph.get_tensor(out);
28-
20+
2921
std::string kernel_name = "view";
3022
kernel_name.reserve(kShaderNameReserve);
3123
add_dtype_suffix(kernel_name, *t_out);
3224
add_memory_layout_suffix(kernel_name, *t_out);
33-
34-
api::utils::uvec3 global_size = t_out->virtual_extents();
25+
26+
api::utils::uvec3 global_size = t_out->extents();
3527
api::utils::uvec3 local_size = adaptive_work_group_size(global_size);
3628

3729
graph.execute_nodes().emplace_back(new ExecuteNode(
@@ -40,22 +32,20 @@ void add_view_node(
4032
global_size,
4133
local_size,
4234
{{out, api::MemoryAccessType::WRITE}, {in, api::MemoryAccessType::READ}},
43-
{
44-
t_out->gpu_sizes_ubo(),
45-
t_out->cpu_sizes_ubo(),
46-
t_in->gpu_sizes_ubo(),
47-
t_in->cpu_sizes_ubo()}));
35+
{t_out->gpu_sizes_ubo(),
36+
t_out->cpu_sizes_ubo(),
37+
t_in->gpu_sizes_ubo(),
38+
t_in->cpu_sizes_ubo()}));
4839
}
4940

50-
5141
void view(ComputeGraph& graph, const std::vector<ValueRef>& args) {
52-
return add_view_node(graph, args[0], args[1], args[2]);
42+
// Note: The second argument size_ref is not used here. Since the output
43+
// tensor's size have been determined during compilation.
44+
return add_view_node(graph, args[0], args[2]);
5345
}
5446

5547
REGISTER_OPERATORS {
5648
VK_REGISTER_OP(aten.view_copy.default, view);
5749
}
5850

59-
60-
} // namespace vkcompute
61-
51+
} // namespace vkcompute

backends/vulkan/test/op_tests/cases.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -197,9 +197,9 @@ def get_permute_inputs():
197197
def get_view_inputs():
198198
test_suite = VkTestSuite(
199199
[
200-
((3,4,5), [1, 1, -1]),
201-
((3,4,5), [1, -1, 1]),
202-
((3,4,5), [-1, 1, 1]),
200+
((3, 4, 5), [1, 1, -1]),
201+
((3, 4, 5), [1, -1, 1]),
202+
((3, 4, 5), [-1, 1, 1]),
203203
((8, 7, 2, 3), [4, 3, 7, 4]),
204204
((8, 7, 2, 3), [7, -1, 2, 1]),
205205
((8, 7, 2, 3), [1, 1, 1, -1]),

backends/vulkan/test/op_tests/utils/codegen_base.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,13 +106,15 @@ def gen_case_name(self, inputs: List[Any], prepack: bool = False) -> str:
106106
name_str += str(size) + "x"
107107
name_str = name_str[:-1]
108108
# minus sign is a invalid char for test case. change to "n".
109-
name_str = name_str.replace('-', 'n')
109+
name_str = name_str.replace("-", "n")
110+
110111
elif isinstance(arg_sizes_or_val, list):
111112
for size in arg_sizes_or_val:
112113
name_str += str(size) + "c"
113114
name_str = name_str[:-1]
114115
# minus sign is a invalid char for test case. change to "n".
115-
name_str = name_str.replace('-', 'n')
116+
name_str = name_str.replace("-", "n")
117+
116118
else:
117119
name_str += str(arg_sizes_or_val).replace(".", "p")
118120
return name_str

0 commit comments

Comments
 (0)