13
13
namespace vkcompute {
14
14
namespace api {
15
15
16
- /*
17
- * Given the strides of a buffer-backed tensor, find the index of the "fastest
18
- * moving" dimension in WHCN dimension order. If multiple dims have the lowest
19
- * stride, then the "earlier" dim is assumed to be the fastest moving (width is
20
- * "earlier" than height).
21
- */
22
- int32_t find_fastest_whcn_dim (const std::vector<int64_t >& strides) {
23
- if (strides.size () == 0 ) {
24
- return 0 ;
25
- }
26
- int32_t fastest_dim = 0 ;
27
- int64_t min_stride = strides.at (0 );
28
- for (int d = strides.size () - 1 ; d >= 0 ; --d) {
29
- if (strides.at (d) < min_stride) {
30
- fastest_dim = d;
31
- min_stride = strides.at (d);
32
- }
33
- }
34
- return (strides.size () - 1 - fastest_dim);
35
- }
36
-
37
16
/*
38
17
* Given the strides of a buffer-backed tensor, estimate the equivalent memory
39
18
* layout enum value by identifying the fastest moving dimension.
40
19
*/
41
20
utils::GPUMemoryLayout estimate_memory_layout (
42
- const std::vector<int64_t >& strides ) {
43
- int32_t fastest_dim = find_fastest_whcn_dim (strides );
44
- if (fastest_dim <= 3 ) {
45
- return utils::GPUMemoryLayout (fastest_dim );
21
+ const std::vector<int64_t >& dim_order ) {
22
+ int64_t fastest_dim_whcn = dim_order. size () - 1 - dim_order. back ( );
23
+ if (fastest_dim_whcn >= 0 && fastest_dim_whcn < 3 ) {
24
+ return utils::GPUMemoryLayout (fastest_dim_whcn );
46
25
}
47
26
48
27
// TODO(ssjia) find a way to gracefully recover from this case by i.e. adding
@@ -51,41 +30,70 @@ utils::GPUMemoryLayout estimate_memory_layout(
51
30
VK_THROW (" No compatible GPUMemoryLayout value" );
52
31
}
53
32
33
+ std::vector<int64_t > calculate_dim_order (
34
+ const size_t ndim,
35
+ const utils::GPUMemoryLayout memory_layout) {
36
+ // Special case for zero dim tensors
37
+ if (ndim == 0 ) {
38
+ return {0 };
39
+ }
40
+ std::vector<int64_t > dim_order (ndim);
41
+ int64_t last_dim =
42
+ ndim - utils::to_packed_dim_nchw_offset<int64_t >(memory_layout);
43
+
44
+ int64_t cur_dim = 0 ;
45
+ for (int d = 0 ; d < ndim; ++d) {
46
+ if (d == last_dim) {
47
+ cur_dim++;
48
+ }
49
+ dim_order[d] = cur_dim;
50
+ cur_dim++;
51
+ }
52
+ if (last_dim >= 0 ) {
53
+ dim_order[ndim - 1 ] = last_dim;
54
+ }
55
+
56
+ return dim_order;
57
+ }
58
+
54
59
std::vector<int64_t > calculate_strides (
55
60
const std::vector<int64_t >& sizes,
56
- const utils::GPUMemoryLayout memory_layout ) {
61
+ const std::vector< int64_t >& dim_order ) {
57
62
// For zero dim tensors
58
63
if (sizes.size () == 0 ) {
59
64
return {1 };
60
65
}
61
66
62
- const int64_t dim_offset =
63
- utils::to_packed_dim_nchw_offset<int64_t >(memory_layout);
64
- int64_t last_dim = sizes.size () - dim_offset;
65
- if (last_dim < 0 ) {
66
- last_dim = sizes.size () - 1 ;
67
- }
68
-
69
67
size_t ndim = sizes.size ();
70
68
std::vector<int64_t > strides (ndim);
71
69
72
- const int64_t last_dim_size = sizes.at (last_dim);
73
-
74
- for (int stride_d = ndim - 1 ; stride_d >= 0 ; stride_d--) {
75
- strides.at (stride_d) = 1 ;
76
- if (stride_d == last_dim) {
77
- continue ;
78
- }
79
- strides.at (stride_d) = last_dim_size;
80
- for (int size_d = ndim - 1 ; size_d > stride_d; size_d--) {
81
- if (size_d != last_dim) {
82
- strides.at (stride_d) *= sizes.at (size_d);
83
- }
70
+ strides[dim_order[ndim - 1 ]] = 1 ;
71
+ for (int32_t i = ndim - 2 ; i >= 0 ; --i) {
72
+ if (sizes[dim_order[i + 1 ]] == 0 ) {
73
+ strides[dim_order[i]] = strides[dim_order[i + 1 ]];
74
+ } else {
75
+ strides[dim_order[i]] =
76
+ strides[dim_order[i + 1 ]] * sizes[dim_order[i + 1 ]];
84
77
}
85
78
}
79
+
86
80
return strides;
87
81
}
88
82
83
+ bool dim_order_is_valid (const std::vector<int64_t >& dim_order) {
84
+ int64_t sum = 0 ;
85
+ for (size_t i = 0 ; i < dim_order.size (); ++i) {
86
+ if (dim_order[i] < 0 || dim_order[i] >= dim_order.size ()) {
87
+ return false ;
88
+ }
89
+ sum += dim_order[i];
90
+ }
91
+ int64_t n = static_cast <int64_t >(dim_order.size () - 1 );
92
+ // Sanity check that the sum of the indices in the vector is equal to the sum
93
+ // of 0 + 1 + 2 + ... + (ndim - 1)
94
+ return sum == n * (n + 1 ) / 2 ;
95
+ }
96
+
89
97
std::vector<int64_t > unsqueeze_strides (
90
98
const std::vector<int64_t >& strides,
91
99
const int64_t numel) {
@@ -170,7 +178,8 @@ vTensor::vTensor(
170
178
memory_layout_ (memory_layout),
171
179
// Calculate tensor size metadata
172
180
sizes_(sizes.begin(), sizes.end()),
173
- strides_(calculate_strides(sizes, memory_layout_)),
181
+ dim_order_(calculate_dim_order(sizes_.size(), memory_layout_)),
182
+ strides_(calculate_strides(sizes, dim_order_)),
174
183
numel_(utils::multiply_integers(sizes_)),
175
184
padded_sizes_{calculate_padded_sizes (sizes, memory_layout_)},
176
185
unsqueezed_strides_{unsqueeze_strides (strides_, numel_)},
@@ -189,6 +198,9 @@ vTensor::vTensor(
189
198
padded_sizes_,
190
199
dtype_,
191
200
allocate_memory) {
201
+ VK_CHECK_COND (
202
+ dim_order_is_valid (dim_order_), " computed dim order is invalid" );
203
+
192
204
if (storage_type != utils::kBuffer ) {
193
205
texture_limits_.limits = utils::ivec3{
194
206
utils::safe_downcast<int32_t >(storage_.image_extents_ [0 ]),
@@ -204,16 +216,39 @@ vTensor::vTensor(
204
216
}
205
217
}
206
218
219
+ vTensor::vTensor (const vTensor& other)
220
+ : dtype_(other.dtype_),
221
+ memory_layout_ (other.memory_layout_),
222
+ // Copy tensor size metadata
223
+ sizes_(other.sizes_.begin(), other.sizes_.end()),
224
+ dim_order_(other.dim_order_.begin(), other.dim_order_.end()),
225
+ strides_(other.strides_.begin(), other.strides_.end()),
226
+ numel_(other.numel_),
227
+ padded_sizes_{other.padded_sizes_ .begin (), other.padded_sizes_ .end ()},
228
+ unsqueezed_strides_{
229
+ other.unsqueezed_strides_ .begin (),
230
+ other.unsqueezed_strides_ .end ()},
231
+ padded_numel_ (other.padded_numel_),
232
+ texture_limits_{other.texture_limits_ },
233
+ // Empty initialize Utility Uniform Buffers
234
+ sizes_uniform_ (),
235
+ strides_uniform_ (),
236
+ numel_uniform_ (),
237
+ texture_limits_uniform_ (),
238
+ // Copy Tensor storage
239
+ storage_ (other.storage_) {}
240
+
207
241
vTensor::vTensor (
208
242
const vTensor& other,
209
243
const std::vector<int64_t >& sizes,
210
- const std::vector<int64_t >& strides ,
211
- const size_t offset_numel)
244
+ const std::vector<int64_t >& dim_order ,
245
+ const int64_t offset_numel)
212
246
: dtype_(other.dtype_),
213
- memory_layout_ (estimate_memory_layout(strides )),
247
+ memory_layout_ (estimate_memory_layout(dim_order )),
214
248
// Copy tensor size metadata
215
249
sizes_(sizes.begin(), sizes.end()),
216
- strides_(strides.begin(), strides.end()),
250
+ dim_order_(dim_order.begin(), dim_order.end()),
251
+ strides_(calculate_strides(sizes_, dim_order_)),
217
252
numel_(utils::multiply_integers(sizes_)),
218
253
padded_sizes_{calculate_padded_sizes (sizes, memory_layout_)},
219
254
unsqueezed_strides_{unsqueeze_strides (strides_, numel_)},
@@ -226,6 +261,8 @@ vTensor::vTensor(
226
261
texture_limits_uniform_ (),
227
262
// Copy Tensor storage
228
263
storage_ (other.storage_, vkapi::element_size(dtype_) * offset_numel) {
264
+ VK_CHECK_COND (
265
+ dim_order_is_valid (dim_order_), " new dim order provided is invalid" );
229
266
VK_CHECK_COND (
230
267
offset_numel + numel_ <= other.numel (),
231
268
" Tensor alias cannot access more elements than available in the original"
@@ -339,9 +376,17 @@ void vTensor::bind_allocation(const vkapi::Allocation& allocation) {
339
376
}
340
377
}
341
378
342
- void vTensor::update_size_metadata (const std::vector<int64_t >& new_sizes) {
379
+ void vTensor::update_metadata (
380
+ const std::vector<int64_t >& new_sizes,
381
+ const std::vector<int64_t >& new_dim_order) {
343
382
sizes_ = new_sizes;
344
- strides_ = calculate_strides (new_sizes, memory_layout_);
383
+ dim_order_ = new_dim_order;
384
+ strides_ = calculate_strides (sizes_, dim_order_);
385
+ // Only update the memory layout for buffer-backed tensors. Strides are
386
+ // meaningless for texture-backed tensors and do not impact the memory layout.
387
+ if (storage_type () == utils::kBuffer ) {
388
+ memory_layout_ = estimate_memory_layout (dim_order_);
389
+ }
345
390
numel_ = utils::multiply_integers (sizes_);
346
391
347
392
padded_sizes_ = calculate_padded_sizes (sizes_, memory_layout_);
@@ -373,15 +418,7 @@ void vTensor::update_size_metadata(const std::vector<int64_t>& new_sizes) {
373
418
}
374
419
}
375
420
376
- void vTensor::reallocate (const std::vector<int64_t >& new_sizes) {
377
- update_size_metadata (new_sizes);
378
- storage_.discard_and_reallocate (
379
- calculate_padded_sizes (new_sizes, memory_layout_),
380
- memory_layout_,
381
- dtype_);
382
- }
383
-
384
- void vTensor::virtual_resize (const std::vector<int64_t >& new_sizes) {
421
+ void vTensor::check_sizes (const std::vector<int64_t >& sizes) const {
385
422
if (storage_type () != utils::kBuffer ) {
386
423
// For texture storage check that the current texture is large enough for
387
424
// the new sizes of the tensor.
@@ -394,10 +431,47 @@ void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {
394
431
395
432
VK_CHECK_COND (
396
433
valid_resize,
397
- " Cannot use virtual resize if new sizes requires a larger texture." );
434
+ " tensor sizes requires a larger texture than the current one." );
435
+ } else {
436
+ // For buffer storage check that the current buffer is large enough for the
437
+ // new sizes of the tensor.
438
+ int64_t numel = utils::multiply_integers (sizes);
439
+ bool valid_resize =
440
+ numel + storage_.buffer_offset_ <= storage_.buffer_length_ ;
441
+ VK_CHECK_COND (
442
+ valid_resize,
443
+ " tensor sizes requires a larger buffer than the current one." );
398
444
}
445
+ }
446
+
447
+ void vTensor::virtual_reconfigure (
448
+ const std::vector<int64_t >& new_sizes,
449
+ const std::vector<int64_t >& new_dim_order) {
450
+ VK_CHECK_COND (
451
+ storage_type () == utils::kBuffer ,
452
+ " virtual_reconfigure is only applicable for buffer backed tensors" );
453
+ VK_CHECK_COND (new_sizes.size () == new_dim_order.size ());
454
+ VK_CHECK_COND (dim_order_is_valid (new_dim_order));
399
455
400
- update_size_metadata (new_sizes);
456
+ check_sizes (new_sizes);
457
+ update_metadata (new_sizes, new_dim_order);
458
+ }
459
+
460
+ void vTensor::virtual_resize (const std::vector<int64_t >& new_sizes) {
461
+ VK_CHECK_COND (
462
+ new_sizes.size () == dim_order_.size (),
463
+ " new sizes cannot modify the dimensionality of the tensor " );
464
+
465
+ check_sizes (new_sizes);
466
+ update_metadata (new_sizes, dim_order_);
467
+ }
468
+
469
+ void vTensor::reallocate (const std::vector<int64_t >& new_sizes) {
470
+ update_metadata (new_sizes, dim_order_);
471
+ storage_.discard_and_reallocate (
472
+ calculate_padded_sizes (new_sizes, memory_layout_),
473
+ memory_layout_,
474
+ dtype_);
401
475
}
402
476
403
477
//
@@ -480,6 +554,7 @@ vTensorStorage::vTensorStorage(
480
554
storage_type_{storage_type},
481
555
image_extents_ (calculate_image_extents(padded_sizes, gpu_memory_layout)),
482
556
buffer_length_{utils::multiply_integers (padded_sizes)},
557
+ buffer_offset_{0 },
483
558
image_ (allocate_image(
484
559
context_,
485
560
image_extents_,
@@ -496,11 +571,12 @@ vTensorStorage::vTensorStorage(
496
571
497
572
vTensorStorage::vTensorStorage (
498
573
const vTensorStorage& other,
499
- const size_t buffer_offset)
574
+ const int64_t buffer_offset)
500
575
: context_(other.context_),
501
576
storage_type_{other.storage_type_ },
502
577
image_extents_ (other.image_extents_),
503
578
buffer_length_{other.buffer_length_ },
579
+ buffer_offset_{buffer_offset},
504
580
image_ (),
505
581
buffer_(other.buffer_, buffer_offset),
506
582
last_access_{other.last_access_ } {
0 commit comments