@@ -13,80 +13,6 @@ namespace vkcompute {
13
13
14
14
namespace {
15
15
16
- /*
17
- * Calculates the strides of a contiguous tensor. empty_tensor_restride from
18
- * TensorImpl.h was used as a reference.
19
- */
20
- std::vector<int64_t > calc_contiguous_strides (
21
- const std::vector<int64_t >& sizes) {
22
- int64_t ndim = static_cast <int64_t >(sizes.size ());
23
- std::vector<int64_t > strides (ndim);
24
-
25
- int64_t running_product = 1 ;
26
- if (ndim >= 1 ) {
27
- strides.at (ndim - 1 ) = running_product;
28
- for (int i = static_cast <int >(sizes.size ()) - 2 ; i >= 0 ; --i) {
29
- running_product *= sizes.at (i + 1 );
30
- strides.at (i) = running_product;
31
- }
32
- }
33
-
34
- return strides;
35
- }
36
-
37
- std::vector<int64_t > calc_channels_last_strides (
38
- const std::vector<int64_t >& sizes) {
39
- std::vector<int64_t > strides (sizes.size ());
40
-
41
- switch (sizes.size ()) {
42
- case 4 :
43
- strides.at (1 ) = 1 ;
44
- strides.at (3 ) = sizes.at (1 );
45
- strides.at (2 ) = strides.at (3 ) * sizes.at (3 );
46
- strides.at (0 ) = strides.at (2 ) * sizes.at (2 );
47
- return strides;
48
- case 3 :
49
- strides.at (0 ) = 1 ;
50
- strides.at (2 ) = sizes.at (0 );
51
- strides.at (1 ) = strides.at (2 ) * sizes.at (2 );
52
- return strides;
53
- default :
54
- VK_THROW (" ChannelsLast format only available for 3 <= ndim <= 4!" );
55
- }
56
-
57
- return strides;
58
- }
59
-
60
- /*
61
- * Calculates the strides of a tensor based on the sizes and memory format. Note
62
- * that strides are only valid for vTensors that are backed by buffer storage;
63
- * if texture storage is used then the strides are invalid and set to zeros.
64
- */
65
- std::vector<int64_t > calc_strides (
66
- const std::vector<int64_t >& sizes,
67
- const api::GPUMemoryLayout memory_layout,
68
- const api::StorageType storage_type) {
69
- switch (storage_type) {
70
- case api::kBuffer :
71
- switch (memory_layout) {
72
- case api::kWidthPacked :
73
- return calc_contiguous_strides (sizes);
74
- break ;
75
- case api::kChannelsPacked :
76
- return calc_channels_last_strides (sizes);
77
- break ;
78
- default :
79
- VK_THROW (" Invalid memory format used to create vTensor!" );
80
- }
81
- break ;
82
- case api::kTexture3D :
83
- case api::kTexture2D :
84
- return std::vector<int64_t >(sizes.size ());
85
- default :
86
- VK_THROW (" Invalid storage type used to create vTensor!" );
87
- }
88
- }
89
-
90
16
/*
91
17
* When stored on the GPU, one dimension will be aligned to the next multiple of
92
18
* 4 in order to take advantage of vec4 data types. The dimension that is
@@ -176,11 +102,11 @@ api::utils::uvec3 create_image_extents(
176
102
177
103
switch (memory_layout) {
178
104
case api::kWidthPacked :
179
- VK_CHECK_COND (width % 4 == 0 , " Channels must be divisible by 4!" );
105
+ VK_CHECK_COND (width % 4 == 0 , " Width must be divisible by 4!" );
180
106
width /= 4 ;
181
107
break ;
182
108
case api::kHeightPacked :
183
- VK_CHECK_COND (height % 4 == 0 , " Channels must be divisible by 4!" );
109
+ VK_CHECK_COND (height % 4 == 0 , " Height must be divisible by 4!" );
184
110
height /= 4 ;
185
111
break ;
186
112
case api::kChannelsPacked :
@@ -212,23 +138,19 @@ vTensor::vTensor(
212
138
memory_layout_ (memory_layout),
213
139
// Calculate sizes and strides
214
140
sizes_(sizes.begin(), sizes.end()),
215
- strides_{calc_strides (sizes, memory_layout_, storage_type)},
216
141
gpu_sizes_{calc_gpu_sizes (sizes, memory_layout_, storage_type)},
217
- gpu_strides_{calc_strides (gpu_sizes_, memory_layout_, storage_type)},
218
- virtual_extents_ (
219
- create_image_extents (gpu_sizes_, storage_type, memory_layout)),
220
142
// Utility Uniform Buffers that can be passed to shaders as arguments
221
143
cpu_sizes_uniform_ (nullptr ),
222
144
gpu_sizes_uniform_(nullptr ),
223
145
extents_uniform_(nullptr ),
224
146
// Construct Tensor storage
225
- view_(std::make_shared<vTensorStorage> (
147
+ storage_ (
226
148
context,
227
149
storage_type,
228
150
memory_layout_,
229
151
gpu_sizes_,
230
152
dtype_,
231
- allocate_memory)) {
153
+ allocate_memory) {
232
154
if (dtype == api::kHalf ) {
233
155
VK_CHECK_COND (
234
156
api::context ()->adapter_ptr ()->has_16bit_storage (),
@@ -237,93 +159,60 @@ vTensor::vTensor(
237
159
}
238
160
}
239
161
240
- vTensor::vTensor (
241
- api::Context* const context,
242
- const std::vector<int64_t >& sizes,
243
- double q_scale,
244
- int64_t q_zero_point,
245
- const api::ScalarType dtype,
246
- const api::StorageType storage_type,
247
- const api::GPUMemoryLayout memory_layout)
248
- : dtype_(dtype),
249
- memory_layout_(memory_layout),
250
- // Calculate sizes and strides
251
- sizes_(sizes.begin(), sizes.end()),
252
- strides_{calc_strides (sizes, memory_layout_, storage_type)},
253
- gpu_sizes_{calc_gpu_sizes (sizes, memory_layout_, storage_type)},
254
- gpu_strides_{calc_strides (gpu_sizes_, memory_layout_, storage_type)},
255
- virtual_extents_ (
256
- create_image_extents (gpu_sizes_, storage_type, memory_layout)),
257
- // Vulkan uniform buffer containing sizes and stride info
258
- cpu_sizes_uniform_(nullptr ),
259
- gpu_sizes_uniform_(nullptr ),
260
- extents_uniform_(nullptr ),
261
- // Quantization params
262
- is_quantized_{true },
263
- q_scale_{q_scale},
264
- q_zero_point_{q_zero_point},
265
- // Construct Tensor storage
266
- view_ (std::make_shared<vTensorStorage>(
267
- context,
268
- storage_type,
269
- memory_layout_,
270
- gpu_sizes_,
271
- dtype_)) {}
272
-
273
162
api::VulkanImage& vTensor::image (
274
163
api::PipelineBarrier& pipeline_barrier,
275
- const api::PipelineStageFlags stage) const & {
276
- view_-> transition (pipeline_barrier, stage, api::MemoryAccessType::READ);
277
- return view_-> image_ ;
164
+ const api::PipelineStageFlags stage) & {
165
+ storage_. transition (pipeline_barrier, stage, api::MemoryAccessType::READ);
166
+ return storage_. image_ ;
278
167
}
279
168
280
169
api::VulkanImage& vTensor::image (
281
170
api::PipelineBarrier& pipeline_barrier,
282
171
const api::PipelineStageFlags stage,
283
172
const api::MemoryAccessFlags access) & {
284
- view_-> transition (pipeline_barrier, stage, access);
285
- return view_-> image_ ;
173
+ storage_. transition (pipeline_barrier, stage, access);
174
+ return storage_. image_ ;
286
175
}
287
176
288
177
api::VulkanBuffer& vTensor::buffer (
289
178
api::PipelineBarrier& pipeline_barrier,
290
- const api::PipelineStageFlags stage) const & {
291
- view_-> transition (pipeline_barrier, stage, api::MemoryAccessType::READ);
292
- return view_-> buffer_ ;
179
+ const api::PipelineStageFlags stage) & {
180
+ storage_. transition (pipeline_barrier, stage, api::MemoryAccessType::READ);
181
+ return storage_. buffer_ ;
293
182
}
294
183
295
184
api::VulkanBuffer& vTensor::buffer (
296
185
api::PipelineBarrier& pipeline_barrier,
297
186
const api::PipelineStageFlags stage,
298
187
const api::MemoryAccessFlags access) & {
299
- view_-> transition (pipeline_barrier, stage, access);
300
- return view_-> buffer_ ;
188
+ storage_. transition (pipeline_barrier, stage, access);
189
+ return storage_. buffer_ ;
301
190
}
302
191
303
192
std::shared_ptr<api::UniformParamsBuffer> vTensor::cpu_sizes_ubo () {
304
193
if (!cpu_sizes_uniform_) {
305
194
cpu_sizes_uniform_.reset (new api::UniformParamsBuffer (
306
- view_-> context_ , api::utils::make_whcn_ivec4 (sizes_)));
195
+ storage_. context_ , api::utils::make_whcn_ivec4 (sizes_)));
307
196
}
308
197
return cpu_sizes_uniform_;
309
198
}
310
199
311
200
std::shared_ptr<api::UniformParamsBuffer> vTensor::gpu_sizes_ubo () {
312
201
if (!gpu_sizes_uniform_) {
313
202
gpu_sizes_uniform_.reset (new api::UniformParamsBuffer (
314
- view_-> context_ , api::utils::make_whcn_ivec4 (gpu_sizes_)));
203
+ storage_. context_ , api::utils::make_whcn_ivec4 (gpu_sizes_)));
315
204
}
316
205
return gpu_sizes_uniform_;
317
206
}
318
207
319
208
std::shared_ptr<api::UniformParamsBuffer> vTensor::extents_ubo () {
320
209
if (!extents_uniform_) {
321
210
extents_uniform_.reset (new api::UniformParamsBuffer (
322
- view_-> context_ ,
211
+ storage_. context_ ,
323
212
api::utils::uvec4 (
324
- {view_-> extents_ .data [0 ],
325
- view_-> extents_ .data [1 ],
326
- view_-> extents_ .data [2 ],
213
+ {storage_. extents_ .data [0 ],
214
+ storage_. extents_ .data [1 ],
215
+ storage_. extents_ .data [2 ],
327
216
1u })));
328
217
}
329
218
return extents_uniform_;
@@ -332,41 +221,41 @@ std::shared_ptr<api::UniformParamsBuffer> vTensor::extents_ubo() {
332
221
VmaAllocationCreateInfo vTensor::get_allocation_create_info () const {
333
222
switch (storage_type ()) {
334
223
case api::kBuffer :
335
- return view_-> buffer_ .allocation_create_info ();
224
+ return storage_. buffer_ .allocation_create_info ();
336
225
case api::kTexture2D :
337
226
case api::kTexture3D :
338
- return view_-> image_ .allocation_create_info ();
227
+ return storage_. image_ .allocation_create_info ();
339
228
}
340
229
return {};
341
230
}
342
231
343
232
VkMemoryRequirements vTensor::get_memory_requirements () const {
344
233
switch (storage_type ()) {
345
234
case api::kBuffer :
346
- return view_-> buffer_ .get_memory_requirements ();
235
+ return storage_. buffer_ .get_memory_requirements ();
347
236
case api::kTexture2D :
348
237
case api::kTexture3D :
349
- return view_-> image_ .get_memory_requirements ();
238
+ return storage_. image_ .get_memory_requirements ();
350
239
}
351
240
return {};
352
241
}
353
242
354
243
void vTensor::bind_allocation (const api::MemoryAllocation& allocation) {
355
244
switch (storage_type ()) {
356
245
case api::kBuffer :
357
- view_-> buffer_ .bind_allocation (allocation);
246
+ storage_. buffer_ .bind_allocation (allocation);
358
247
break ;
359
248
case api::kTexture2D :
360
249
case api::kTexture3D :
361
- view_-> image_ .bind_allocation (allocation);
250
+ storage_. image_ .bind_allocation (allocation);
362
251
break ;
363
252
}
364
253
}
365
254
366
255
void vTensor::update_size_metadata (const std::vector<int64_t >& new_sizes) {
367
256
sizes_ = new_sizes;
368
257
gpu_sizes_ = calc_gpu_sizes (sizes_, memory_layout_, storage_type ());
369
- virtual_extents_ =
258
+ api::utils::uvec3 virtual_extents =
370
259
create_image_extents (gpu_sizes_, storage_type (), memory_layout_);
371
260
372
261
if (cpu_sizes_uniform_) {
@@ -379,47 +268,23 @@ void vTensor::update_size_metadata(const std::vector<int64_t>& new_sizes) {
379
268
380
269
if (extents_uniform_) {
381
270
extents_uniform_->update (api::utils::uvec4 (
382
- {virtual_extents_ .data [0 ],
383
- virtual_extents_ .data [1 ],
384
- virtual_extents_ .data [2 ],
271
+ {virtual_extents .data [0 ],
272
+ virtual_extents .data [1 ],
273
+ virtual_extents .data [2 ],
385
274
1u }));
386
275
}
387
276
}
388
277
389
278
void vTensor::reallocate (const std::vector<int64_t >& new_sizes) {
390
279
update_size_metadata (new_sizes);
391
- view_-> discard_and_reallocate (
280
+ storage_. discard_and_reallocate (
392
281
calc_gpu_sizes (new_sizes, memory_layout_, storage_type ()),
393
282
memory_layout_,
394
283
dtype_);
395
284
}
396
285
397
286
void vTensor::virtual_resize (const std::vector<int64_t >& new_sizes) {
398
287
update_size_metadata (new_sizes);
399
- if (storage_type () == api::kBuffer ) {
400
- if (gpu_nbytes () > view_->buffer_ .mem_size ()) {
401
- VK_THROW (
402
- " Cannot virtual_resize a vTensor with sizes that require a larger "
403
- " buffer! reallocate() should be used instead." );
404
- }
405
- } else {
406
- bool valid_resize = true ;
407
- if (virtual_extents_.data [0 ] > view_->extents_ .data [0 ]) {
408
- valid_resize = false ;
409
- }
410
- if (virtual_extents_.data [1 ] > view_->extents_ .data [1 ]) {
411
- valid_resize = false ;
412
- }
413
- if (virtual_extents_.data [2 ] > view_->extents_ .data [2 ]) {
414
- valid_resize = false ;
415
- }
416
-
417
- if (!valid_resize) {
418
- VK_THROW (
419
- " Cannot virtual_resize a vTensor with sizes that require a larger "
420
- " image texture! reallocate() should be used instead." );
421
- }
422
- }
423
288
}
424
289
425
290
//
@@ -442,7 +307,7 @@ api::VulkanImage allocate_image(
442
307
};
443
308
444
309
VkImageType image_type = VK_IMAGE_TYPE_3D;
445
- VkImageViewType image_view_type = VK_IMAGE_VIEW_TYPE_3D ;
310
+ VkImageViewType image_view_type;
446
311
447
312
switch (storage_type) {
448
313
case api::kTexture3D :
@@ -584,39 +449,6 @@ void vTensorStorage::transition(
584
449
last_access_.access = cur_access;
585
450
}
586
451
587
- void add_buffer_barrier (
588
- api::PipelineBarrier& pipeline_barrier,
589
- const api::VulkanBuffer& buffer,
590
- const api::PipelineStageFlags prev_stage,
591
- const api::MemoryAccessFlags prev_access,
592
- const api::PipelineStageFlags cur_stage,
593
- const api::MemoryAccessFlags cur_access) {
594
- // Check for RAW
595
- const bool read_requested = (cur_access & api::MemoryAccessType::READ) != 0 ;
596
- const bool prev_written = (prev_access & api::MemoryAccessType::WRITE) != 0 ;
597
-
598
- const bool is_RAW = read_requested && prev_written;
599
-
600
- if (is_RAW) {
601
- VkPipelineStageFlags src_stage = api::vk_stage (prev_stage);
602
- if (0u == src_stage) {
603
- src_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
604
- }
605
- VkPipelineStageFlags dst_stage = api::vk_stage (cur_stage);
606
- if (0u == dst_stage) {
607
- dst_stage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
608
- }
609
-
610
- pipeline_barrier.stage .src |= src_stage;
611
- pipeline_barrier.stage .dst |= dst_stage;
612
-
613
- pipeline_barrier.buffers .emplace_back (
614
- api::vk_access (prev_stage, prev_access),
615
- api::vk_access (cur_stage, cur_access),
616
- buffer);
617
- }
618
- }
619
-
620
452
void vTensorStorage::discard_and_reallocate (
621
453
const std::vector<int64_t >& gpu_sizes,
622
454
const api::GPUMemoryLayout gpu_memory_layout,
0 commit comments