Skip to content

[ET-VK] Adding UniformData struct in vTensor class to store uniform data, which will be stored using shared ptr and can be shared with push constants. #7222

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 69 additions & 27 deletions backends/vulkan/runtime/api/containers/Tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
*/

#include <executorch/backends/vulkan/runtime/api/containers/Tensor.h>
#include <cstring>

namespace vkcompute {
namespace api {
Expand Down Expand Up @@ -446,11 +447,10 @@ vTensor::vTensor(
dim_order_(calculate_dim_order(sizes_.size(), packed_dim_)),
axis_map_(default_axis_map()),
strides_(calculate_strides(sizes, dim_order_)),
numel_(utils::multiply_integers(sizes_)),
padded_sizes_{calculate_padded_sizes(sizes, packed_dim_)},
unsqueezed_strides_{unsqueeze_strides(strides_, numel_)},
unsqueezed_strides_{
unsqueeze_strides(strides_, utils::multiply_integers(sizes_))},
padded_numel_(utils::multiply_integers(padded_sizes_)),
logical_limits_{{0, 0, 0}},
uniforms_(),
// Utility Uniform Buffers that can be passed to shaders as arguments
uniforms_size_(0),
Expand All @@ -467,6 +467,11 @@ vTensor::vTensor(
padded_sizes_,
dtype_,
allocate_memory) {
uniform_data_ = std::make_shared<UniformData>(UniformData{
sizes_,
unsqueezed_strides_,
{{0, 0, 0}},
static_cast<size_t>(utils::multiply_integers(sizes_))});
VK_CHECK_COND(
dim_order_is_valid(dim_order_), "computed dim order is invalid");

Expand Down Expand Up @@ -494,11 +499,9 @@ vTensor::vTensor(
dim_order_(),
axis_map_(default_axis_map()),
strides_(),
numel_(utils::multiply_integers(sizes_)),
padded_sizes_(calculate_padded_sizes(sizes_, packed_dim_)),
unsqueezed_strides_(),
padded_numel_(utils::multiply_integers(padded_sizes_)),
logical_limits_(),
uniforms_(),
// Utility Uniform Buffers that can be passed to shaders as arguments
uniforms_size_(0),
Expand All @@ -508,6 +511,11 @@ vTensor::vTensor(
logical_limits_uniform_offset_(kUniformOffsetUnset),
// Construct Tensor storage
storage_(context, image) {
uniform_data_ = std::make_shared<UniformData>(UniformData{
sizes_,
{0, 0, 0, 0},
{{0, 0, 0}},
static_cast<size_t>(utils::multiply_integers(sizes_))});
set_logical_limits(storage_.image_extents_);
}

Expand All @@ -519,13 +527,11 @@ vTensor::vTensor(vTensor& other)
dim_order_(other.dim_order_.begin(), other.dim_order_.end()),
axis_map_(other.axis_map_.begin(), other.axis_map_.end()),
strides_(other.strides_.begin(), other.strides_.end()),
numel_(other.numel_),
padded_sizes_{other.padded_sizes_.begin(), other.padded_sizes_.end()},
unsqueezed_strides_{
other.unsqueezed_strides_.begin(),
other.unsqueezed_strides_.end()},
padded_numel_(other.padded_numel_),
logical_limits_{other.logical_limits_},
uniforms_(),
// Empty initialize Utility Uniform Buffers
uniforms_size_(0),
Expand All @@ -534,7 +540,9 @@ vTensor::vTensor(vTensor& other)
numel_uniform_offset_(kUniformOffsetUnset),
logical_limits_uniform_offset_(kUniformOffsetUnset),
// Copy Tensor storage
storage_(other.storage_) {}
storage_(other.storage_) {
uniform_data_ = std::make_shared<UniformData>(*other.get_uniform_data());
}

vTensor::vTensor(
vTensor& other,
Expand All @@ -548,11 +556,10 @@ vTensor::vTensor(
dim_order_(dim_order.begin(), dim_order.end()),
axis_map_(default_axis_map()),
strides_(calculate_strides(sizes_, dim_order_)),
numel_(utils::multiply_integers(sizes_)),
padded_sizes_{calculate_padded_sizes(sizes, packed_dim_)},
unsqueezed_strides_{unsqueeze_strides(strides_, numel_)},
unsqueezed_strides_{
unsqueeze_strides(strides_, utils::multiply_integers(sizes_))},
padded_numel_(utils::multiply_integers(padded_sizes_)),
logical_limits_(other.logical_limits_),
uniforms_(),
// Empty initialize Utility Uniform Buffers
uniforms_size_(0),
Expand All @@ -562,14 +569,45 @@ vTensor::vTensor(
logical_limits_uniform_offset_(kUniformOffsetUnset),
// Copy Tensor storage
storage_(other.storage_, vkapi::element_size(dtype_) * offset_numel) {
uniform_data_ = std::make_shared<UniformData>(UniformData{
sizes_,
unsqueezed_strides_,
{other.logical_limits()},
static_cast<size_t>(utils::multiply_integers(sizes_))});

VK_CHECK_COND(
dim_order_is_valid(dim_order_), "new dim order provided is invalid");
VK_CHECK_COND(
offset_numel + numel_ <= other.numel(),
offset_numel + numel() <= other.numel(),
"Tensor alias cannot access more elements than available in the original"
"tensor");
}

uint32_t vTensor::UniformData::write_attribute(
void* dst,
const uint32_t dst_offset,
const uint32_t max_dst_size,
const Attribute attr) {
#define WRITE_ATTRIBUTE_CASE(enum_name, member_name) \
case vTensor::Attribute::enum_name: { \
VK_CHECK_COND( \
(dst_offset + sizeof(member_name)) <= max_dst_size, \
"Attempting to write tensor attribute outside data boundary."); \
memcpy((uint8_t*)dst + dst_offset, &member_name, sizeof(member_name)); \
return sizeof(member_name); \
}
switch (attr) {
WRITE_ATTRIBUTE_CASE(SIZES, sizes_v);
WRITE_ATTRIBUTE_CASE(STRIDES, strides_v);
WRITE_ATTRIBUTE_CASE(LOGICAL_LIMITS, logical_limits);
WRITE_ATTRIBUTE_CASE(NUMEL, numel);
default:
VK_THROW("Invalid Attribute");
}
#undef WRITE_ATTRIBUTE_CASE
return 0;
}

vkapi::VulkanImage& vTensor::image(
vkapi::PipelineBarrier& pipeline_barrier,
const vkapi::PipelineStageFlags stage) & {
Expand Down Expand Up @@ -601,9 +639,9 @@ vkapi::VulkanBuffer& vTensor::buffer(
}

void vTensor::set_logical_limits(const utils::uvec3& image_extents) {
logical_limits_.limits[0] = image_extents[axis_map_.at(0)];
logical_limits_.limits[1] = image_extents[axis_map_.at(1)];
logical_limits_.limits[2] = image_extents[axis_map_.at(2)];
uniform_data_->logical_limits.limits[0] = image_extents[axis_map_.at(0)];
uniform_data_->logical_limits.limits[1] = image_extents[axis_map_.at(1)];
uniform_data_->logical_limits.limits[2] = image_extents[axis_map_.at(2)];
}

utils::GPUMemoryLayout vTensor::estimate_memory_layout() const {
Expand Down Expand Up @@ -661,7 +699,7 @@ const vkapi::BufferBindInfo vTensor::logical_limits_ubo() {
"Uniform data allocation has exceeded Tensor uniform buffer size");
logical_limits_uniform_offset_ = uniforms_size_;
uniforms_size_ += kSizePerUniform;
uniforms_.update(logical_limits_, logical_limits_uniform_offset_);
uniforms_.update(logical_limits(), logical_limits_uniform_offset_);
}
return vkapi::BufferBindInfo(
uniforms_.buffer(), logical_limits_uniform_offset_);
Expand All @@ -677,7 +715,7 @@ const vkapi::BufferBindInfo vTensor::numel_ubo() {
"Uniform data allocation has exceeded Tensor uniform buffer size");
numel_uniform_offset_ = uniforms_size_;
uniforms_size_ += kSizePerUniform;
uniforms_.update(numel_, numel_uniform_offset_);
uniforms_.update(numel(), numel_uniform_offset_);
}
return vkapi::BufferBindInfo(uniforms_.buffer(), numel_uniform_offset_);
}
Expand All @@ -687,10 +725,10 @@ size_t vTensor::staging_buffer_numel() const {
const bool int8_supported =
storage_.context_->adapter_ptr()->has_full_int8_buffers_support();
if (is_int8 && !int8_supported) {
return utils::align_up_4(numel_);
return utils::align_up_4(numel());
}
if (storage_type() == utils::kBuffer) {
return numel_;
return numel();
}
return padded_numel_;
}
Expand Down Expand Up @@ -720,30 +758,32 @@ void vTensor::bind_allocation(const vkapi::Allocation& allocation) {

void vTensor::update_metadata() {
strides_ = calculate_strides(sizes_, dim_order_);
numel_ = utils::multiply_integers(sizes_);
uniform_data_->numel = utils::multiply_integers(sizes_);

padded_sizes_ = calculate_padded_sizes(sizes_, packed_dim_);
unsqueezed_strides_ = unsqueeze_strides(strides_, numel_);
unsqueezed_strides_ = unsqueeze_strides(strides_, numel());
padded_numel_ = utils::multiply_integers(padded_sizes_);

// Update uniform data if it has been modified
uniform_data_->sizes_v = utils::make_whcn_ivec4(sizes_);
uniform_data_->strides_v = utils::make_whcn_ivec4(unsqueezed_strides_);

// Calculate the image extents that would have been used to allocate a texture
// withthe current sizes, and use that to set the logical limits.
set_logical_limits(
calculate_image_extents(padded_sizes_, axis_map_, packed_dim_));

if (sizes_uniform_offset_ != kUniformOffsetUnset) {
uniforms_.update(utils::make_whcn_ivec4(sizes_), sizes_uniform_offset_);
uniforms_.update(uniform_data_->sizes_v, sizes_uniform_offset_);
}
if (unsqueezed_strides_offset_ != kUniformOffsetUnset) {
uniforms_.update(
utils::make_whcn_ivec4(unsqueezed_strides_),
unsqueezed_strides_offset_);
uniforms_.update(uniform_data_->strides_v, unsqueezed_strides_offset_);
}
if (numel_uniform_offset_ != kUniformOffsetUnset) {
uniforms_.update(numel_, numel_uniform_offset_);
uniforms_.update(numel(), numel_uniform_offset_);
}
if (logical_limits_uniform_offset_ != kUniformOffsetUnset) {
uniforms_.update(logical_limits_, logical_limits_uniform_offset_);
uniforms_.update(logical_limits(), logical_limits_uniform_offset_);
}
}

Expand Down Expand Up @@ -796,6 +836,8 @@ void vTensor::virtual_clone(const vTensor& other) {
dim_order_ = other.dim_order_;
axis_map_ = other.axis_map_;
packed_dim_ = other.packed_dim_;

*uniform_data_ = *other.get_uniform_data();
}

void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {
Expand Down
62 changes: 55 additions & 7 deletions backends/vulkan/runtime/api/containers/Tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,46 @@ class vTensor final {
vTensor(vTensor&& other) = default;
vTensor& operator=(vTensor&& other) = default;

enum class Attribute : uint8_t {
SIZES,
STRIDES,
LOGICAL_LIMITS,
NUMEL,
};

class UniformData {
utils::ivec4 sizes_v;
utils::ivec4 strides_v;
// See the comments documenting logical_limits() for more context.
TextureLimits logical_limits;
// Contains the number of elements in the tensor according to the canonical
// sizes.
size_t numel;

friend class vTensor;

UniformData(
const std::vector<int64_t>& sizes,
const std::vector<int64_t>& strides,
const TextureLimits& logical_limits,
const size_t numel)
: sizes_v(utils::make_whcn_ivec4(sizes)),
strides_v(utils::make_whcn_ivec4(strides)),
logical_limits(logical_limits),
numel(numel) {}

public:
/*
* Write tensor's metadata into dst, at the given dst_offset. max_dst_size
* is the size of dst and is used to avoid out of bounds writes.
*/
uint32_t write_attribute(
void* dst,
const uint32_t dst_offset,
const uint32_t max_dst_size,
const Attribute attr);
};

private:
/*
* "Core" tensor metadata. They are the minimum amount of information required
Expand Down Expand Up @@ -274,9 +314,6 @@ class vTensor final {

// strides of the tensor in NCHW dimension order
std::vector<int64_t> strides_;
// Contains the number of elements in the tensor according to the canonical
// sizes.
size_t numel_;

/*
* The below metadata members are derived from the above, and are typically
Expand All @@ -293,8 +330,6 @@ class vTensor final {
// Contains the number of elements in the tensor according to the padded
// sizes.
size_t padded_numel_;
// See the comments documenting logical_limits() for more context.
TextureLimits logical_limits_;

/*
* Utility GPU buffer that can be passed to shaders in order to convey tensor
Expand Down Expand Up @@ -326,6 +361,8 @@ class vTensor final {

vTensorStorage storage_;

std::shared_ptr<UniformData> uniform_data_;

public:
/*
Texture Access
Expand Down Expand Up @@ -391,7 +428,7 @@ class vTensor final {
* instead of the original sizes.
*/
inline const utils::ivec3& logical_limits() const {
return logical_limits_.limits;
return uniform_data_->logical_limits.limits;
}

/*
Expand Down Expand Up @@ -501,7 +538,7 @@ class vTensor final {
const vkapi::BufferBindInfo numel_ubo();

inline size_t numel() const {
return numel_;
return uniform_data_->numel;
}

inline size_t nbytes() const {
Expand Down Expand Up @@ -589,7 +626,18 @@ class vTensor final {
inline bool is_view_of(const vTensor& other) const {
return storage_.is_copy_of(other.storage_);
}

const std::shared_ptr<UniformData>& get_uniform_data() const {
return uniform_data_;
}
};

static constexpr vTensor::Attribute kTensorSizes = vTensor::Attribute::SIZES;
static constexpr vTensor::Attribute kTensorStrides =
vTensor::Attribute::STRIDES;
static constexpr vTensor::Attribute kTensorLogicalLimits =
vTensor::Attribute::LOGICAL_LIMITS;
static constexpr vTensor::Attribute kTensorNumel = vTensor::Attribute::NUMEL;

} // namespace api
} // namespace vkcompute