@@ -33,16 +33,6 @@ void add_copy_offset_node(
33
33
add_dtype_suffix (kernel_name, *t_out);
34
34
add_storage_type_suffix (kernel_name, *t_out);
35
35
36
- const struct Block final {
37
- alignas (16 ) ivec3 range;
38
- alignas (16 ) ivec3 src_offset;
39
- alignas (16 ) ivec3 dst_offset;
40
- } offset_params{
41
- range,
42
- src_offset,
43
- dst_offset,
44
- };
45
-
46
36
auto shader = VK_KERNEL_FROM_STR (kernel_name);
47
37
48
38
graph.execute_nodes ().emplace_back (new DispatchNode (
@@ -56,11 +46,18 @@ void add_copy_offset_node(
56
46
{in, vkapi::kRead },
57
47
},
58
48
// Parameter buffers
59
- {
60
- graph.create_params_buffer (offset_params),
61
- },
49
+ {},
62
50
// Specialization Constants
63
- {graph.hashed_layout_of (out), graph.hashed_layout_of (in)}));
51
+ {graph.hashed_layout_of (out), graph.hashed_layout_of (in)},
52
+ nullptr ,
53
+ {},
54
+ {
55
+ PushConstantDataInfo (&range, sizeof (range), sizeof (utils::ivec4)),
56
+ PushConstantDataInfo (
57
+ &src_offset, sizeof (src_offset), sizeof (utils::ivec4)),
58
+ PushConstantDataInfo (
59
+ &dst_offset, sizeof (dst_offset), sizeof (utils::ivec4)),
60
+ }));
64
61
}
65
62
66
63
void add_copy_channel_offset_node (
@@ -128,28 +125,23 @@ void add_copy_channel_offset_node(
128
125
// The shader combines the global invocation id and the dst_offset to get
129
126
// the actual coordinate.
130
127
131
- ivec3 dst_offset{
128
+ const ivec3 dst_offset{
132
129
0 , 0 , dst_first_z + batch_idx * utils::div_up_4 (out_channels)};
133
130
134
- uvec3 global_size{
131
+ const uvec3 global_size{
135
132
utils::safe_downcast<uint32_t >(dim_at<kWidth4D >(in_sizes)),
136
133
utils::safe_downcast<uint32_t >(dim_at<kHeight4D >(in_sizes)),
137
134
utils::safe_downcast<uint32_t >(dst_last_z - dst_first_z + 1 )};
138
- uvec3 local_size = graph.create_local_wg_size (global_size);
139
-
140
- const struct Block final {
141
- ivec3 range;
142
- int32_t channel_range;
143
- ivec3 dst_offset;
144
- int32_t dst_channel_offset;
145
- int32_t src_channel_offset;
146
- } channel_offset_params{
147
- utils::make_ivec3 (global_size),
148
- channel_range,
149
- dst_offset,
150
- dst_channel_offset,
151
- src_channel_offset,
152
- };
135
+ const uvec3 local_size = graph.create_local_wg_size (global_size);
136
+
137
+ const utils::ivec4 range_params = {
138
+ static_cast <int >(global_size[0 ]),
139
+ static_cast <int >(global_size[1 ]),
140
+ static_cast <int >(global_size[2 ]),
141
+ channel_range};
142
+
143
+ const utils::ivec4 offset_params = {
144
+ dst_offset[0 ], dst_offset[1 ], dst_offset[2 ], dst_channel_offset};
153
145
154
146
auto shader = VK_KERNEL_FROM_STR (kernel_name);
155
147
@@ -165,13 +157,17 @@ void add_copy_channel_offset_node(
165
157
{in, vkapi::MemoryAccessType::READ},
166
158
},
167
159
// Parameter buffers
168
- {
169
- t_out->sizes_ubo (),
170
- t_in->sizes_ubo (),
171
- graph.create_params_buffer (channel_offset_params),
172
- },
160
+ {},
173
161
// Specialization Constants
174
- {graph.hashed_layout_of (out), graph.hashed_layout_of (in)}));
162
+ {graph.hashed_layout_of (out), graph.hashed_layout_of (in)},
163
+ nullptr ,
164
+ {},
165
+ {graph.sizes_pc_of (out),
166
+ graph.sizes_pc_of (in),
167
+ PushConstantDataInfo (&range_params, sizeof (range_params)),
168
+ PushConstantDataInfo (&offset_params, sizeof (offset_params)),
169
+ PushConstantDataInfo (
170
+ &src_channel_offset, sizeof (src_channel_offset))}));
175
171
}
176
172
}
177
173
0 commit comments