Skip to content

Commit 11e858f

Browse files
authored
[ET-VK] Adding UniformData struct in vTensor class to store uniform data, which will be stored using shared ptr and can be shared with push constants.
Differential Revision: D66733611 Pull Request resolved: #7222
1 parent df988d0 commit 11e858f

File tree

2 files changed

+124
-34
lines changed

2 files changed

+124
-34
lines changed

backends/vulkan/runtime/api/containers/Tensor.cpp

Lines changed: 69 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
*/
88

99
#include <executorch/backends/vulkan/runtime/api/containers/Tensor.h>
10+
#include <cstring>
1011

1112
namespace vkcompute {
1213
namespace api {
@@ -446,11 +447,10 @@ vTensor::vTensor(
446447
dim_order_(calculate_dim_order(sizes_.size(), packed_dim_)),
447448
axis_map_(default_axis_map()),
448449
strides_(calculate_strides(sizes, dim_order_)),
449-
numel_(utils::multiply_integers(sizes_)),
450450
padded_sizes_{calculate_padded_sizes(sizes, packed_dim_)},
451-
unsqueezed_strides_{unsqueeze_strides(strides_, numel_)},
451+
unsqueezed_strides_{
452+
unsqueeze_strides(strides_, utils::multiply_integers(sizes_))},
452453
padded_numel_(utils::multiply_integers(padded_sizes_)),
453-
logical_limits_{{0, 0, 0}},
454454
uniforms_(),
455455
// Utility Uniform Buffers that can be passed to shaders as arguments
456456
uniforms_size_(0),
@@ -467,6 +467,11 @@ vTensor::vTensor(
467467
padded_sizes_,
468468
dtype_,
469469
allocate_memory) {
470+
uniform_data_ = std::make_shared<UniformData>(UniformData{
471+
sizes_,
472+
unsqueezed_strides_,
473+
{{0, 0, 0}},
474+
static_cast<size_t>(utils::multiply_integers(sizes_))});
470475
VK_CHECK_COND(
471476
dim_order_is_valid(dim_order_), "computed dim order is invalid");
472477

@@ -494,11 +499,9 @@ vTensor::vTensor(
494499
dim_order_(),
495500
axis_map_(default_axis_map()),
496501
strides_(),
497-
numel_(utils::multiply_integers(sizes_)),
498502
padded_sizes_(calculate_padded_sizes(sizes_, packed_dim_)),
499503
unsqueezed_strides_(),
500504
padded_numel_(utils::multiply_integers(padded_sizes_)),
501-
logical_limits_(),
502505
uniforms_(),
503506
// Utility Uniform Buffers that can be passed to shaders as arguments
504507
uniforms_size_(0),
@@ -508,6 +511,11 @@ vTensor::vTensor(
508511
logical_limits_uniform_offset_(kUniformOffsetUnset),
509512
// Construct Tensor storage
510513
storage_(context, image) {
514+
uniform_data_ = std::make_shared<UniformData>(UniformData{
515+
sizes_,
516+
{0, 0, 0, 0},
517+
{{0, 0, 0}},
518+
static_cast<size_t>(utils::multiply_integers(sizes_))});
511519
set_logical_limits(storage_.image_extents_);
512520
}
513521

@@ -519,13 +527,11 @@ vTensor::vTensor(vTensor& other)
519527
dim_order_(other.dim_order_.begin(), other.dim_order_.end()),
520528
axis_map_(other.axis_map_.begin(), other.axis_map_.end()),
521529
strides_(other.strides_.begin(), other.strides_.end()),
522-
numel_(other.numel_),
523530
padded_sizes_{other.padded_sizes_.begin(), other.padded_sizes_.end()},
524531
unsqueezed_strides_{
525532
other.unsqueezed_strides_.begin(),
526533
other.unsqueezed_strides_.end()},
527534
padded_numel_(other.padded_numel_),
528-
logical_limits_{other.logical_limits_},
529535
uniforms_(),
530536
// Empty initialize Utility Uniform Buffers
531537
uniforms_size_(0),
@@ -534,7 +540,9 @@ vTensor::vTensor(vTensor& other)
534540
numel_uniform_offset_(kUniformOffsetUnset),
535541
logical_limits_uniform_offset_(kUniformOffsetUnset),
536542
// Copy Tensor storage
537-
storage_(other.storage_) {}
543+
storage_(other.storage_) {
544+
uniform_data_ = std::make_shared<UniformData>(*other.get_uniform_data());
545+
}
538546

539547
vTensor::vTensor(
540548
vTensor& other,
@@ -548,11 +556,10 @@ vTensor::vTensor(
548556
dim_order_(dim_order.begin(), dim_order.end()),
549557
axis_map_(default_axis_map()),
550558
strides_(calculate_strides(sizes_, dim_order_)),
551-
numel_(utils::multiply_integers(sizes_)),
552559
padded_sizes_{calculate_padded_sizes(sizes, packed_dim_)},
553-
unsqueezed_strides_{unsqueeze_strides(strides_, numel_)},
560+
unsqueezed_strides_{
561+
unsqueeze_strides(strides_, utils::multiply_integers(sizes_))},
554562
padded_numel_(utils::multiply_integers(padded_sizes_)),
555-
logical_limits_(other.logical_limits_),
556563
uniforms_(),
557564
// Empty initialize Utility Uniform Buffers
558565
uniforms_size_(0),
@@ -562,14 +569,45 @@ vTensor::vTensor(
562569
logical_limits_uniform_offset_(kUniformOffsetUnset),
563570
// Copy Tensor storage
564571
storage_(other.storage_, vkapi::element_size(dtype_) * offset_numel) {
572+
uniform_data_ = std::make_shared<UniformData>(UniformData{
573+
sizes_,
574+
unsqueezed_strides_,
575+
{other.logical_limits()},
576+
static_cast<size_t>(utils::multiply_integers(sizes_))});
577+
565578
VK_CHECK_COND(
566579
dim_order_is_valid(dim_order_), "new dim order provided is invalid");
567580
VK_CHECK_COND(
568-
offset_numel + numel_ <= other.numel(),
581+
offset_numel + numel() <= other.numel(),
569582
"Tensor alias cannot access more elements than available in the original"
570583
"tensor");
571584
}
572585

586+
uint32_t vTensor::UniformData::write_attribute(
587+
void* dst,
588+
const uint32_t dst_offset,
589+
const uint32_t max_dst_size,
590+
const Attribute attr) {
591+
#define WRITE_ATTRIBUTE_CASE(enum_name, member_name) \
592+
case vTensor::Attribute::enum_name: { \
593+
VK_CHECK_COND( \
594+
(dst_offset + sizeof(member_name)) <= max_dst_size, \
595+
"Attempting to write tensor attribute outside data boundary."); \
596+
memcpy((uint8_t*)dst + dst_offset, &member_name, sizeof(member_name)); \
597+
return sizeof(member_name); \
598+
}
599+
switch (attr) {
600+
WRITE_ATTRIBUTE_CASE(SIZES, sizes_v);
601+
WRITE_ATTRIBUTE_CASE(STRIDES, strides_v);
602+
WRITE_ATTRIBUTE_CASE(LOGICAL_LIMITS, logical_limits);
603+
WRITE_ATTRIBUTE_CASE(NUMEL, numel);
604+
default:
605+
VK_THROW("Invalid Attribute");
606+
}
607+
#undef WRITE_ATTRIBUTE_CASE
608+
return 0;
609+
}
610+
573611
vkapi::VulkanImage& vTensor::image(
574612
vkapi::PipelineBarrier& pipeline_barrier,
575613
const vkapi::PipelineStageFlags stage) & {
@@ -601,9 +639,9 @@ vkapi::VulkanBuffer& vTensor::buffer(
601639
}
602640

603641
void vTensor::set_logical_limits(const utils::uvec3& image_extents) {
604-
logical_limits_.limits[0] = image_extents[axis_map_.at(0)];
605-
logical_limits_.limits[1] = image_extents[axis_map_.at(1)];
606-
logical_limits_.limits[2] = image_extents[axis_map_.at(2)];
642+
uniform_data_->logical_limits.limits[0] = image_extents[axis_map_.at(0)];
643+
uniform_data_->logical_limits.limits[1] = image_extents[axis_map_.at(1)];
644+
uniform_data_->logical_limits.limits[2] = image_extents[axis_map_.at(2)];
607645
}
608646

609647
utils::GPUMemoryLayout vTensor::estimate_memory_layout() const {
@@ -661,7 +699,7 @@ const vkapi::BufferBindInfo vTensor::logical_limits_ubo() {
661699
"Uniform data allocation has exceeded Tensor uniform buffer size");
662700
logical_limits_uniform_offset_ = uniforms_size_;
663701
uniforms_size_ += kSizePerUniform;
664-
uniforms_.update(logical_limits_, logical_limits_uniform_offset_);
702+
uniforms_.update(logical_limits(), logical_limits_uniform_offset_);
665703
}
666704
return vkapi::BufferBindInfo(
667705
uniforms_.buffer(), logical_limits_uniform_offset_);
@@ -677,7 +715,7 @@ const vkapi::BufferBindInfo vTensor::numel_ubo() {
677715
"Uniform data allocation has exceeded Tensor uniform buffer size");
678716
numel_uniform_offset_ = uniforms_size_;
679717
uniforms_size_ += kSizePerUniform;
680-
uniforms_.update(numel_, numel_uniform_offset_);
718+
uniforms_.update(numel(), numel_uniform_offset_);
681719
}
682720
return vkapi::BufferBindInfo(uniforms_.buffer(), numel_uniform_offset_);
683721
}
@@ -687,10 +725,10 @@ size_t vTensor::staging_buffer_numel() const {
687725
const bool int8_supported =
688726
storage_.context_->adapter_ptr()->has_full_int8_buffers_support();
689727
if (is_int8 && !int8_supported) {
690-
return utils::align_up_4(numel_);
728+
return utils::align_up_4(numel());
691729
}
692730
if (storage_type() == utils::kBuffer) {
693-
return numel_;
731+
return numel();
694732
}
695733
return padded_numel_;
696734
}
@@ -720,30 +758,32 @@ void vTensor::bind_allocation(const vkapi::Allocation& allocation) {
720758

721759
void vTensor::update_metadata() {
722760
strides_ = calculate_strides(sizes_, dim_order_);
723-
numel_ = utils::multiply_integers(sizes_);
761+
uniform_data_->numel = utils::multiply_integers(sizes_);
724762

725763
padded_sizes_ = calculate_padded_sizes(sizes_, packed_dim_);
726-
unsqueezed_strides_ = unsqueeze_strides(strides_, numel_);
764+
unsqueezed_strides_ = unsqueeze_strides(strides_, numel());
727765
padded_numel_ = utils::multiply_integers(padded_sizes_);
728766

767+
// Update uniform data if it has been modified
768+
uniform_data_->sizes_v = utils::make_whcn_ivec4(sizes_);
769+
uniform_data_->strides_v = utils::make_whcn_ivec4(unsqueezed_strides_);
770+
729771
// Calculate the image extents that would have been used to allocate a texture
730772
// withthe current sizes, and use that to set the logical limits.
731773
set_logical_limits(
732774
calculate_image_extents(padded_sizes_, axis_map_, packed_dim_));
733775

734776
if (sizes_uniform_offset_ != kUniformOffsetUnset) {
735-
uniforms_.update(utils::make_whcn_ivec4(sizes_), sizes_uniform_offset_);
777+
uniforms_.update(uniform_data_->sizes_v, sizes_uniform_offset_);
736778
}
737779
if (unsqueezed_strides_offset_ != kUniformOffsetUnset) {
738-
uniforms_.update(
739-
utils::make_whcn_ivec4(unsqueezed_strides_),
740-
unsqueezed_strides_offset_);
780+
uniforms_.update(uniform_data_->strides_v, unsqueezed_strides_offset_);
741781
}
742782
if (numel_uniform_offset_ != kUniformOffsetUnset) {
743-
uniforms_.update(numel_, numel_uniform_offset_);
783+
uniforms_.update(numel(), numel_uniform_offset_);
744784
}
745785
if (logical_limits_uniform_offset_ != kUniformOffsetUnset) {
746-
uniforms_.update(logical_limits_, logical_limits_uniform_offset_);
786+
uniforms_.update(logical_limits(), logical_limits_uniform_offset_);
747787
}
748788
}
749789

@@ -796,6 +836,8 @@ void vTensor::virtual_clone(const vTensor& other) {
796836
dim_order_ = other.dim_order_;
797837
axis_map_ = other.axis_map_;
798838
packed_dim_ = other.packed_dim_;
839+
840+
*uniform_data_ = *other.get_uniform_data();
799841
}
800842

801843
void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {

backends/vulkan/runtime/api/containers/Tensor.h

Lines changed: 55 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,46 @@ class vTensor final {
229229
vTensor(vTensor&& other) = default;
230230
vTensor& operator=(vTensor&& other) = default;
231231

232+
enum class Attribute : uint8_t {
233+
SIZES,
234+
STRIDES,
235+
LOGICAL_LIMITS,
236+
NUMEL,
237+
};
238+
239+
class UniformData {
240+
utils::ivec4 sizes_v;
241+
utils::ivec4 strides_v;
242+
// See the comments documenting logical_limits() for more context.
243+
TextureLimits logical_limits;
244+
// Contains the number of elements in the tensor according to the canonical
245+
// sizes.
246+
size_t numel;
247+
248+
friend class vTensor;
249+
250+
UniformData(
251+
const std::vector<int64_t>& sizes,
252+
const std::vector<int64_t>& strides,
253+
const TextureLimits& logical_limits,
254+
const size_t numel)
255+
: sizes_v(utils::make_whcn_ivec4(sizes)),
256+
strides_v(utils::make_whcn_ivec4(strides)),
257+
logical_limits(logical_limits),
258+
numel(numel) {}
259+
260+
public:
261+
/*
262+
* Write tensor's metadata into dst, at the given dst_offset. max_dst_size
263+
* is the size of dst and is used to avoid out of bounds writes.
264+
*/
265+
uint32_t write_attribute(
266+
void* dst,
267+
const uint32_t dst_offset,
268+
const uint32_t max_dst_size,
269+
const Attribute attr);
270+
};
271+
232272
private:
233273
/*
234274
* "Core" tensor metadata. They are the minimum amount of information required
@@ -274,9 +314,6 @@ class vTensor final {
274314

275315
// strides of the tensor in NCHW dimension order
276316
std::vector<int64_t> strides_;
277-
// Contains the number of elements in the tensor according to the canonical
278-
// sizes.
279-
size_t numel_;
280317

281318
/*
282319
* The below metadata members are derived from the above, and are typically
@@ -293,8 +330,6 @@ class vTensor final {
293330
// Contains the number of elements in the tensor according to the padded
294331
// sizes.
295332
size_t padded_numel_;
296-
// See the comments documenting logical_limits() for more context.
297-
TextureLimits logical_limits_;
298333

299334
/*
300335
* Utility GPU buffer that can be passed to shaders in order to convey tensor
@@ -326,6 +361,8 @@ class vTensor final {
326361

327362
vTensorStorage storage_;
328363

364+
std::shared_ptr<UniformData> uniform_data_;
365+
329366
public:
330367
/*
331368
Texture Access
@@ -391,7 +428,7 @@ class vTensor final {
391428
* instead of the original sizes.
392429
*/
393430
inline const utils::ivec3& logical_limits() const {
394-
return logical_limits_.limits;
431+
return uniform_data_->logical_limits.limits;
395432
}
396433

397434
/*
@@ -501,7 +538,7 @@ class vTensor final {
501538
const vkapi::BufferBindInfo numel_ubo();
502539

503540
inline size_t numel() const {
504-
return numel_;
541+
return uniform_data_->numel;
505542
}
506543

507544
inline size_t nbytes() const {
@@ -589,7 +626,18 @@ class vTensor final {
589626
inline bool is_view_of(const vTensor& other) const {
590627
return storage_.is_copy_of(other.storage_);
591628
}
629+
630+
const std::shared_ptr<UniformData>& get_uniform_data() const {
631+
return uniform_data_;
632+
}
592633
};
593634

635+
static constexpr vTensor::Attribute kTensorSizes = vTensor::Attribute::SIZES;
636+
static constexpr vTensor::Attribute kTensorStrides =
637+
vTensor::Attribute::STRIDES;
638+
static constexpr vTensor::Attribute kTensorLogicalLimits =
639+
vTensor::Attribute::LOGICAL_LIMITS;
640+
static constexpr vTensor::Attribute kTensorNumel = vTensor::Attribute::NUMEL;
641+
594642
} // namespace api
595643
} // namespace vkcompute

0 commit comments

Comments
 (0)