Skip to content

Commit f5f54b8

Browse files
SS-JIAfacebook-github-bot
authored andcommitted
vTensor cleanup 4/N - consolidate texture positions and extents to be logical positions and extents (#5442)
Summary: Pull Request resolved: #5442 ## Context Tensors are N-dimensional constructs, whereas image textures are limited to 3 dimensions (with an X, Y, and Z axis). Before axis mapping was introduced, converting between tensor index and texture coordinate, as well as converting between tensor sizes and texture limits, was easy because of rigid way texture axes were used to represent tensor dimensions: 1. the X axis of the tensor corresponds to the width dim 2. the Y axis of the tensor corresponds to the height dim 3. the Z axis of the tensor corresponds to the combined batch + channels dim However, with the introduction of axis mapping, the texture representation of a tensor is far more flexible. NOTE: summary is still a WIP. Short on time today so will fill in more context later This diff consolidates all image extent related member variables of `vTensor` to use logical texture limits instead of physical texture limits. The reason for this is to reduce developer confusion about different representations of texture metadata, and also to enforce better practices in shader programming. Using logical texture positions and logical texture limits is usually preferable because of the following: 1. Each element has a clear relationship to a tensor dimension, thus it is easier to understand semantically 2. It's more efficient to translate between logical texture position and tensor indices; in shaders, we often have to convert texture positions to tensor indices to coordinate data loads/write among the different input tensors and output tensor ## Notes for Reviewers There are a large number of changes in this file, but majority of meaningful changes are in `Tensor.h` and `Tensor.cpp`. The rest is just replacing instance of `image_extents()` and `texture_limits()` with `logical_limits()`. ghstack-source-id: 243309911 exported-using-ghexport Reviewed By: jorgep31415 Differential Revision: D62901893 fbshipit-source-id: bfb63151c85255967ed9e26afc82eaa70b1b365b
1 parent 958afe1 commit f5f54b8

24 files changed

+85
-143
lines changed

backends/vulkan/runtime/api/containers/Tensor.cpp

Lines changed: 14 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -418,14 +418,12 @@ vTensor::vTensor(
418418
padded_sizes_{calculate_padded_sizes(sizes, memory_layout_)},
419419
unsqueezed_strides_{unsqueeze_strides(strides_, numel_)},
420420
padded_numel_(utils::multiply_integers(padded_sizes_)),
421-
texture_limits_{{0, 0, 0}},
422421
logical_limits_{{0, 0, 0}},
423422
// Utility Uniform Buffers that can be passed to shaders as arguments
424423
sizes_uniform_(),
425424
strides_uniform_(),
426425
numel_uniform_(),
427426
axis_map_uniform_(),
428-
texture_limits_uniform_(),
429427
logical_limits_uniform_(),
430428
// Construct Tensor storage
431429
storage_(
@@ -440,12 +438,7 @@ vTensor::vTensor(
440438
dim_order_is_valid(dim_order_), "computed dim order is invalid");
441439

442440
if (storage_type != utils::kBuffer) {
443-
texture_limits_.limits = utils::ivec3{
444-
utils::safe_downcast<int32_t>(storage_.image_extents_[0]),
445-
utils::safe_downcast<int32_t>(storage_.image_extents_[1]),
446-
utils::safe_downcast<int32_t>(storage_.image_extents_[2])};
447-
448-
update_logical_limits();
441+
set_logical_limits(storage_.image_extents_);
449442
}
450443

451444
if (dtype == vkapi::kHalf) {
@@ -470,14 +463,12 @@ vTensor::vTensor(const vTensor& other)
470463
other.unsqueezed_strides_.begin(),
471464
other.unsqueezed_strides_.end()},
472465
padded_numel_(other.padded_numel_),
473-
texture_limits_{other.texture_limits_},
474466
logical_limits_{other.logical_limits_},
475467
// Empty initialize Utility Uniform Buffers
476468
sizes_uniform_(),
477469
strides_uniform_(),
478470
numel_uniform_(),
479471
axis_map_uniform_(),
480-
texture_limits_uniform_(),
481472
logical_limits_uniform_(),
482473
// Copy Tensor storage
483474
storage_(other.storage_) {}
@@ -498,14 +489,12 @@ vTensor::vTensor(
498489
padded_sizes_{calculate_padded_sizes(sizes, memory_layout_)},
499490
unsqueezed_strides_{unsqueeze_strides(strides_, numel_)},
500491
padded_numel_(utils::multiply_integers(padded_sizes_)),
501-
texture_limits_{other.texture_limits_},
502492
logical_limits_(other.logical_limits_),
503493
// Empty initialize Utility Uniform Buffers
504494
sizes_uniform_(),
505495
strides_uniform_(),
506496
numel_uniform_(),
507497
axis_map_uniform_(),
508-
texture_limits_uniform_(),
509498
logical_limits_uniform_(),
510499
// Copy Tensor storage
511500
storage_(other.storage_, vkapi::element_size(dtype_) * offset_numel) {
@@ -547,18 +536,10 @@ vkapi::VulkanBuffer& vTensor::buffer(
547536
return storage_.buffer_;
548537
}
549538

550-
void vTensor::update_logical_limits() {
551-
logical_limits_.limits[0] = texture_limits_.limits[axis_map_.at(0)];
552-
logical_limits_.limits[1] = texture_limits_.limits[axis_map_.at(1)];
553-
logical_limits_.limits[2] = texture_limits_.limits[axis_map_.at(2)];
554-
}
555-
556-
utils::uvec3 vTensor::logical_extents() const {
557-
utils::uvec3 logical_extents(
558-
{utils::safe_downcast<uint32_t>(logical_limits_.limits[0]),
559-
utils::safe_downcast<uint32_t>(logical_limits_.limits[1]),
560-
utils::safe_downcast<uint32_t>(logical_limits_.limits[2])});
561-
return logical_extents;
539+
void vTensor::set_logical_limits(const utils::uvec3& image_extents) {
540+
logical_limits_.limits[0] = image_extents[axis_map_.at(0)];
541+
logical_limits_.limits[1] = image_extents[axis_map_.at(1)];
542+
logical_limits_.limits[2] = image_extents[axis_map_.at(2)];
562543
}
563544

564545
const vkapi::BufferBindInfo vTensor::sizes_ubo() {
@@ -585,13 +566,6 @@ const vkapi::BufferBindInfo vTensor::axis_map_ubo() {
585566
return vkapi::BufferBindInfo(axis_map_uniform_.buffer());
586567
}
587568

588-
const vkapi::BufferBindInfo vTensor::texture_limits_ubo() {
589-
if (!texture_limits_uniform_.buffer()) {
590-
texture_limits_uniform_ = ParamsBuffer(storage_.context_, texture_limits_);
591-
}
592-
return vkapi::BufferBindInfo(texture_limits_uniform_.buffer());
593-
}
594-
595569
const vkapi::BufferBindInfo vTensor::logical_limits_ubo() {
596570
if (!logical_limits_uniform_.buffer()) {
597571
logical_limits_uniform_ = ParamsBuffer(storage_.context_, logical_limits_);
@@ -655,18 +629,10 @@ void vTensor::update_metadata() {
655629
unsqueezed_strides_ = unsqueeze_strides(strides_, numel_);
656630
padded_numel_ = utils::multiply_integers(padded_sizes_);
657631

658-
// Calculate the extents of the image texture that would have been required
659-
// for a tensor of the new sizes.
660-
utils::uvec3 virtual_extents =
661-
calculate_image_extents(padded_sizes_, axis_map_, memory_layout_);
662-
663-
// Update the texture limits to reflect the new virtual extents.
664-
texture_limits_.limits = utils::ivec3{
665-
utils::safe_downcast<int32_t>(virtual_extents[0]),
666-
utils::safe_downcast<int32_t>(virtual_extents[1]),
667-
utils::safe_downcast<int32_t>(virtual_extents[2])};
668-
669-
update_logical_limits();
632+
// Calculate the image extents that would have been used to allocate a texture
633+
// withthe current sizes, and use that to set the logical limits.
634+
set_logical_limits(
635+
calculate_image_extents(padded_sizes_, axis_map_, memory_layout_));
670636

671637
if (sizes_uniform_.buffer()) {
672638
sizes_uniform_.update(utils::make_whcn_ivec4(sizes_));
@@ -680,9 +646,6 @@ void vTensor::update_metadata() {
680646
if (axis_map_uniform_.buffer()) {
681647
axis_map_uniform_.update(utils::make_ivec4(axis_map_));
682648
}
683-
if (texture_limits_uniform_.buffer()) {
684-
texture_limits_uniform_.update(texture_limits_);
685-
}
686649
if (logical_limits_uniform_.buffer()) {
687650
logical_limits_uniform_.update(logical_limits_);
688651
}
@@ -695,9 +658,11 @@ void vTensor::check_sizes(const std::vector<int64_t>& sizes) const {
695658
utils::uvec3 virtual_extents =
696659
calculate_image_extents(padded_sizes_, axis_map_, memory_layout_);
697660

698-
bool valid_resize = virtual_extents[0] <= image_extents()[0];
699-
valid_resize = valid_resize && virtual_extents[1] <= image_extents()[1];
700-
valid_resize = valid_resize && virtual_extents[2] <= image_extents()[2];
661+
bool valid_resize = virtual_extents[0] <= storage_.image_extents_[0];
662+
valid_resize =
663+
valid_resize && virtual_extents[1] <= storage_.image_extents_[1];
664+
valid_resize =
665+
valid_resize && virtual_extents[2] <= storage_.image_extents_[2];
701666

702667
VK_CHECK_COND(
703668
valid_resize,

backends/vulkan/runtime/api/containers/Tensor.h

Lines changed: 22 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -276,9 +276,7 @@ class vTensor final {
276276
// Contains the number of elements in the tensor according to the padded
277277
// sizes.
278278
size_t padded_numel_;
279-
// See the comments documenting image_extents() for more context.
280-
TextureLimits texture_limits_;
281-
// See the comments documenting logical_extents() for more context.
279+
// See the comments documenting logical_limits() for more context.
282280
TextureLimits logical_limits_;
283281

284282
/*
@@ -294,7 +292,6 @@ class vTensor final {
294292
ParamsBuffer strides_uniform_;
295293
ParamsBuffer numel_uniform_;
296294
ParamsBuffer axis_map_uniform_;
297-
ParamsBuffer texture_limits_uniform_;
298295
ParamsBuffer logical_limits_uniform_;
299296

300297
vTensorStorage storage_;
@@ -342,28 +339,30 @@ class vTensor final {
342339
return storage_.storage_type_ == utils::kBuffer;
343340
}
344341

345-
/*
346-
* Returns the raw image extents of the underlying image texture used to store
347-
* the tensor's data. Note that due to axis mapping, the X, Y, and Z extents
348-
* may not correspond to the width, height, or channels dimension of the
349-
* tensor.
350-
*/
351-
inline const utils::uvec3& image_extents() const {
352-
return storage_.image_extents_;
353-
}
354-
355342
private:
356-
void update_logical_limits();
343+
void set_logical_limits(const utils::uvec3& image_extents);
357344

358345
public:
359346
/*
360-
* Returns the image extents of the underlying image texture, but re-ordered
361-
* such that the first element is the extent of the axis used to represent the
362-
* tensor's width dimension, the second element is the extent of the axis used
363-
* to represent the tensor's height dimension, and the third element is the
364-
* extent of the axis used to represent the tensor's channels dimension.
347+
* The logical limits of the tensor are derived from the image extents of the
348+
* image texture used to store the tensor, but with two key differences.
349+
*
350+
* First, the image extents are permuted according to the axis map. This
351+
* makes it so that the first element of the logical limit is the limit of the
352+
* texture axis corresponding to the width dimension of the tensor, the next
353+
* element is the limit of the texture axis corresponding to the height
354+
* dimension and the last element is the limit of the texture axis that
355+
* corresponds to the channels dimension of the tensor.
356+
*
357+
* Second, the logical limits may use smaller extents than the actual image
358+
* extents of the image texture. This is due to dynamic shape; if the tensor's
359+
* `virtual_resize()` function is called, then the logical limits will reflect
360+
* the extents that would be needed to support a tensor with the updated sizes
361+
* instead of the original sizes.
365362
*/
366-
utils::uvec3 logical_extents() const;
363+
inline const utils::ivec3& logical_limits() const {
364+
return logical_limits_.limits;
365+
}
367366

368367
/*
369368
* Extract an `vkapi::ScalarType` from the TensorOptions member
@@ -430,18 +429,8 @@ class vTensor final {
430429
const vkapi::BufferBindInfo axis_map_ubo();
431430

432431
/*
433-
* Returns a GPU buffer containing the virtual image extents of the tensor.
434-
* Since a tensor can be resized with the virtual_resize() function, this
435-
* GPU buffer contains the image extents of the tensor calculated using the
436-
* virtual_resize() function. This allows shaders to exit early if they are
437-
* working outside the limits of the texture.
438-
*/
439-
const vkapi::BufferBindInfo texture_limits_ubo();
440-
441-
/*
442-
* Returns a GPU buffer containing the logical image extents of the tensor.
443-
* It contains the same data as texture_limits_ubo(), but with the data
444-
* re-ordered. See the comments for logical_extents() for more context.
432+
* Returns a GPU buffer containing the logical limits of the tensor. See the
433+
* comments for logical_limits() for more context.
445434
*/
446435
const vkapi::BufferBindInfo logical_limits_ubo();
447436

@@ -450,10 +439,6 @@ class vTensor final {
450439
*/
451440
const vkapi::BufferBindInfo numel_ubo();
452441

453-
inline const utils::ivec3 texture_limits() const {
454-
return texture_limits_.limits;
455-
}
456-
457442
inline size_t numel() const {
458443
return numel_;
459444
}

backends/vulkan/runtime/graph/ComputeGraph.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,7 @@ utils::uvec3 ComputeGraph::create_global_wg_size(const ValueRef idx) {
428428
if (is_buffer_storage(idx)) {
429429
return {uint32_t(numel_of(idx)), 1u, 1u};
430430
}
431-
return image_extents_of(idx);
431+
return logical_limits_of(idx);
432432
}
433433

434434
utils::uvec3 ComputeGraph::create_local_wg_size(

backends/vulkan/runtime/graph/ComputeGraph.h

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -284,12 +284,8 @@ class ComputeGraph final {
284284

285285
vkapi::ScalarType dtype_of(const ValueRef idx) const;
286286

287-
inline utils::uvec3 image_extents_of(const ValueRef idx) const {
288-
return values_.at(idx).toConstTensor().image_extents();
289-
}
290-
291-
inline utils::uvec3 logical_extents_of(const ValueRef idx) const {
292-
return values_.at(idx).toConstTensor().logical_extents();
287+
inline const utils::ivec3& logical_limits_of(const ValueRef idx) const {
288+
return values_.at(idx).toConstTensor().logical_limits();
293289
}
294290

295291
inline int32_t numel_of(const ValueRef idx) const {
@@ -335,10 +331,6 @@ class ComputeGraph final {
335331
return values_.at(idx).toTensor().axis_map_ubo();
336332
}
337333

338-
inline vkapi::BufferBindInfo texture_limits_ubo(const ValueRef idx) {
339-
return values_.at(idx).toTensor().texture_limits_ubo();
340-
}
341-
342334
inline vkapi::BufferBindInfo logical_limits_ubo(const ValueRef idx) {
343335
return values_.at(idx).toTensor().logical_limits_ubo();
344336
}

backends/vulkan/runtime/graph/ops/impl/BatchNorm.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ void add_native_batch_norm_node(
8888
{{out_ref, vkapi::MemoryAccessType::WRITE},
8989
{{in_ref, arg_weight, arg_bias, arg_mean, arg_var},
9090
vkapi::MemoryAccessType::READ}},
91-
{t_out->texture_limits_ubo(),
91+
{t_out->logical_limits_ubo(),
9292
graph.create_params_buffer(epsilon),
9393
graph.create_params_buffer(num_texel_per_batch)}));
9494
}

backends/vulkan/runtime/graph/ops/impl/Cat.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ void add_cat_default_node(
4040

4141
for (ValueRef input_ref : *input_list) {
4242
vTensorPtr t_in = graph.get_tensor(input_ref);
43-
utils::ivec3 range = t_in->texture_limits();
43+
utils::ivec3 range = t_in->logical_limits();
4444
add_copy_offset_node(
4545
graph, input_ref, range, src_offset, dst_offset, out);
4646
dst_offset[0] += range[0];
@@ -52,7 +52,7 @@ void add_cat_default_node(
5252

5353
for (ValueRef input_ref : *input_list) {
5454
vTensorPtr t_in = graph.get_tensor(input_ref);
55-
utils::ivec3 range = t_in->texture_limits();
55+
utils::ivec3 range = t_in->logical_limits();
5656
add_copy_offset_node(
5757
graph, input_ref, range, src_offset, dst_offset, out);
5858
dst_offset[1] += range[1];
@@ -63,7 +63,7 @@ void add_cat_default_node(
6363

6464
for (ValueRef input_ref : *input_list) {
6565
vTensorPtr t_in = graph.get_tensor(input_ref);
66-
utils::ivec3 range = t_in->texture_limits();
66+
utils::ivec3 range = t_in->logical_limits();
6767
add_copy_offset_node(
6868
graph, input_ref, range, src_offset, dst_offset, out);
6969
dst_offset[2] += range[2];

backends/vulkan/runtime/graph/ops/impl/Clone.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ void add_clone_node(
3232
graph.create_local_wg_size(out),
3333
{{out, vkapi::MemoryAccessType::WRITE},
3434
{in, vkapi::MemoryAccessType::READ}},
35-
{t_out->texture_limits_ubo()}));
35+
{t_out->logical_limits_ubo()}));
3636
}
3737

3838
void clone(ComputeGraph& graph, const std::vector<ValueRef>& args) {

backends/vulkan/runtime/graph/ops/impl/Convolution.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ utils::uvec3 create_conv2d_global_wg_size(
291291
const Conv2dMethod method,
292292
const ValueRef out) {
293293
if (method == Conv2dMethod::Pointwise) {
294-
const utils::uvec3 image_extents = graph.image_extents_of(out);
294+
const utils::uvec3 image_extents = graph.logical_limits_of(out);
295295
return {
296296
utils::div_up(image_extents[0u], 2u),
297297
utils::div_up(image_extents[1u], 2u),
@@ -376,7 +376,7 @@ void add_conv2d_node(
376376
{{arg_in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
377377
// Shader params buffers
378378
{
379-
t_out->texture_limits_ubo(),
379+
t_out->logical_limits_ubo(),
380380
t_in->sizes_ubo(),
381381
graph.create_params_buffer(kernel_params),
382382
graph.create_params_buffer(extra_params),
@@ -474,7 +474,7 @@ void add_conv1d_node(
474474
{{arg_in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
475475
// Shader params buffers
476476
{
477-
t_out->texture_limits_ubo(),
477+
t_out->logical_limits_ubo(),
478478
t_in->sizes_ubo(),
479479
graph.create_params_buffer(kernel_params),
480480
graph.create_params_buffer(out_params),

backends/vulkan/runtime/graph/ops/impl/Linear.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ void add_addmm_naive_node(
103103
add_storage_type_suffix(kernel_name, graph.storage_type_of(out));
104104
add_dtype_suffix(kernel_name, graph.dtype_of(out));
105105

106-
utils::uvec3 global_wg_size = graph.logical_extents_of(out);
106+
utils::uvec3 global_wg_size = graph.logical_limits_of(out);
107107
graph.execute_nodes().emplace_back(new ExecuteNode(
108108
graph,
109109
VK_KERNEL_FROM_STR(kernel_name),
@@ -182,7 +182,7 @@ void add_addmm_optimized_node(
182182

183183
add_dtype_suffix(kernel_name, graph.dtype_of(out));
184184

185-
utils::uvec3 global_size;
185+
utils::uvec3 global_size = graph.logical_limits_of(out);
186186

187187
// Each thread computes a W=(2/4) x H=4 x C=(1/4) output tile. Therefore, the
188188
// total number of threads is W/(2 or 4) x H/4 x C/1. Since the out tensor is
@@ -193,9 +193,9 @@ void add_addmm_optimized_node(
193193
if (mat1_sizes.at(mat1_dims - 2) < 8) {
194194
// Use `logical_extents` instead of `image_extents` because the workgroup
195195
// axes need to correspond to tensor dimensions.
196-
global_size = utils::divup_vec(graph.logical_extents_of(out), {4, 2, 1});
196+
global_size = utils::divup_vec(global_size, {4, 2, 1});
197197
} else {
198-
global_size = utils::divup_vec(graph.logical_extents_of(out), {4, 4, 1});
198+
global_size = utils::divup_vec(global_size, {4, 4, 1});
199199
}
200200
utils::uvec3 local_size = adaptive_work_group_size(global_size);
201201

0 commit comments

Comments
 (0)