@@ -33,16 +33,6 @@ void add_copy_offset_node(
33
33
add_dtype_suffix (kernel_name, *t_out);
34
34
add_storage_type_suffix (kernel_name, *t_out);
35
35
36
- const struct Block final {
37
- alignas (16 ) ivec3 range;
38
- alignas (16 ) ivec3 src_offset;
39
- alignas (16 ) ivec3 dst_offset;
40
- } offset_params{
41
- range,
42
- src_offset,
43
- dst_offset,
44
- };
45
-
46
36
auto shader = VK_KERNEL_FROM_STR (kernel_name);
47
37
48
38
graph.execute_nodes ().emplace_back (new DispatchNode (
@@ -56,11 +46,16 @@ void add_copy_offset_node(
56
46
{in, vkapi::kRead },
57
47
},
58
48
// Parameter buffers
59
- {
60
- graph.create_params_buffer (offset_params),
61
- },
49
+ {},
62
50
// Specialization Constants
63
- {graph.hashed_layout_of (out), graph.hashed_layout_of (in)}));
51
+ {graph.hashed_layout_of (out), graph.hashed_layout_of (in)},
52
+ nullptr ,
53
+ {},
54
+ {
55
+ PushConstantDataInfo (&range, sizeof (utils::ivec4)),
56
+ PushConstantDataInfo (&src_offset, sizeof (utils::ivec4)),
57
+ PushConstantDataInfo (&dst_offset, sizeof (utils::ivec4)),
58
+ }));
64
59
}
65
60
66
61
void add_copy_channel_offset_node (
@@ -128,28 +123,23 @@ void add_copy_channel_offset_node(
128
123
// The shader combines the global invocation id and the dst_offset to get
129
124
// the actual coordinate.
130
125
131
- ivec3 dst_offset{
126
+ const ivec3 dst_offset{
132
127
0 , 0 , dst_first_z + batch_idx * utils::div_up_4 (out_channels)};
133
128
134
- uvec3 global_size{
129
+ const uvec3 global_size{
135
130
utils::safe_downcast<uint32_t >(dim_at<kWidth4D >(in_sizes)),
136
131
utils::safe_downcast<uint32_t >(dim_at<kHeight4D >(in_sizes)),
137
132
utils::safe_downcast<uint32_t >(dst_last_z - dst_first_z + 1 )};
138
- uvec3 local_size = graph.create_local_wg_size (global_size);
139
-
140
- const struct Block final {
141
- ivec3 range;
142
- int32_t channel_range;
143
- ivec3 dst_offset;
144
- int32_t dst_channel_offset;
145
- int32_t src_channel_offset;
146
- } channel_offset_params{
147
- utils::make_ivec3 (global_size),
148
- channel_range,
149
- dst_offset,
150
- dst_channel_offset,
151
- src_channel_offset,
152
- };
133
+ const uvec3 local_size = graph.create_local_wg_size (global_size);
134
+
135
+ const utils::ivec4 range_params = {
136
+ static_cast <int >(global_size[0 ]),
137
+ static_cast <int >(global_size[1 ]),
138
+ static_cast <int >(global_size[2 ]),
139
+ channel_range};
140
+
141
+ const utils::ivec4 offset_params = {
142
+ dst_offset[0 ], dst_offset[1 ], dst_offset[2 ], dst_channel_offset};
153
143
154
144
auto shader = VK_KERNEL_FROM_STR (kernel_name);
155
145
@@ -165,13 +155,19 @@ void add_copy_channel_offset_node(
165
155
{in, vkapi::MemoryAccessType::READ},
166
156
},
167
157
// Parameter buffers
168
- {
169
- t_out->sizes_ubo (),
170
- t_in->sizes_ubo (),
171
- graph.create_params_buffer (channel_offset_params),
172
- },
158
+ {},
173
159
// Specialization Constants
174
- {graph.hashed_layout_of (out), graph.hashed_layout_of (in)}));
160
+ {graph.hashed_layout_of (out), graph.hashed_layout_of (in)},
161
+ nullptr ,
162
+ {},
163
+ {PushConstantDataInfo (
164
+ t_out->get_uniform_data (), api::vTensor::Attribute::SIZES),
165
+ PushConstantDataInfo (
166
+ t_in->get_uniform_data (), api::vTensor::Attribute::SIZES),
167
+ PushConstantDataInfo (&range_params, sizeof (range_params)),
168
+ PushConstantDataInfo (&offset_params, sizeof (offset_params)),
169
+ PushConstantDataInfo (
170
+ &src_channel_offset, sizeof (src_channel_offset))}));
175
171
}
176
172
}
177
173
0 commit comments