Skip to content

[ET-VK] Introduce copy constructor for vTensor to allow for zero-copy… #4791

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions backends/vulkan/runtime/api/containers/Tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,44 @@
namespace vkcompute {
namespace api {

/*
* Given the strides of a buffer-backed tensor, find the index of the "fastest
* moving" dimension in WHCN dimension order. If multiple dims have the lowest
* stride, then the "earlier" dim is assumed to be the fastest moving (width is
* "earlier" than height).
*/
int32_t find_fastest_whcn_dim(const std::vector<int64_t>& strides) {
if (strides.size() == 0) {
return 0;
}
int32_t fastest_dim = 0;
int64_t min_stride = strides.at(0);
for (int d = strides.size() - 1; d >= 0; --d) {
if (strides.at(d) < min_stride) {
fastest_dim = d;
min_stride = strides.at(d);
}
}
return (strides.size() - 1 - fastest_dim);
}

/*
* Given the strides of a buffer-backed tensor, estimate the equivalent memory
* layout enum value by identifying the fastest moving dimension.
*/
utils::GPUMemoryLayout estimate_memory_layout(
const std::vector<int64_t>& strides) {
int32_t fastest_dim = find_fastest_whcn_dim(strides);
if (fastest_dim <= 3) {
return utils::GPUMemoryLayout(fastest_dim);
}

// TODO(ssjia) find a way to gracefully recover from this case by i.e. adding
// a UNKOWN GPUMemoryLayout. This is not high priority though because we don't
// expect this to ever come up in practice.
VK_THROW("No compatible GPUMemoryLayout value");
}

std::vector<int64_t> calculate_strides(
const std::vector<int64_t>& sizes,
const utils::GPUMemoryLayout memory_layout) {
Expand Down Expand Up @@ -166,6 +204,34 @@ vTensor::vTensor(
}
}

vTensor::vTensor(
const vTensor& other,
const std::vector<int64_t>& sizes,
const std::vector<int64_t>& strides,
const size_t offset_numel)
: dtype_(other.dtype_),
memory_layout_(estimate_memory_layout(strides)),
// Copy tensor size metadata
sizes_(sizes.begin(), sizes.end()),
strides_(strides.begin(), strides.end()),
numel_(utils::multiply_integers(sizes_)),
padded_sizes_{calculate_padded_sizes(sizes, memory_layout_)},
unsqueezed_strides_{unsqueeze_strides(strides_, numel_)},
padded_numel_(utils::multiply_integers(padded_sizes_)),
texture_limits_{{0, 0, 0}},
// Empty initialize Utility Uniform Buffers
sizes_uniform_(),
strides_uniform_(),
numel_uniform_(),
texture_limits_uniform_(),
// Copy Tensor storage
storage_(other.storage_, vkapi::element_size(dtype_) * offset_numel) {
VK_CHECK_COND(
offset_numel + numel_ <= other.numel(),
"Tensor alias cannot access more elements than available in the original"
"tensor");
}

vkapi::VulkanImage& vTensor::image(
vkapi::PipelineBarrier& pipeline_barrier,
const vkapi::PipelineStageFlags stage) & {
Expand Down Expand Up @@ -428,6 +494,21 @@ vTensorStorage::vTensorStorage(
allocate_memory)),
last_access_{} {}

vTensorStorage::vTensorStorage(
const vTensorStorage& other,
const size_t buffer_offset)
: context_(other.context_),
storage_type_{other.storage_type_},
image_extents_(other.image_extents_),
buffer_length_{other.buffer_length_},
image_(),
buffer_(other.buffer_, buffer_offset),
last_access_{other.last_access_} {
if (other.storage_type_ != utils::kBuffer) {
VK_THROW("Tensors with texture storage cannot be copied!");
}
}

vTensorStorage::~vTensorStorage() {
flush();
}
Expand Down
30 changes: 29 additions & 1 deletion backends/vulkan/runtime/api/containers/Tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,19 @@ class vTensorStorage final {
const vkapi::ScalarType dtype,
const bool allocate_memory = true);

vTensorStorage(const vTensorStorage& other) = delete;
protected:
/*
* This allows for creation of tensors that use the same underlying storage
* as another tensor. Note that this functionality is currently enabled for
* tensors that have buffer storage only. The created tensor will not have
* ownership of the underlying VkBuffer. This constructor is marked protected
* because this behaviour is unsafe, since the original tensor may be
* destroyed before the copy is destroyed.
*/
vTensorStorage(const vTensorStorage& other, const size_t buffer_offset = 0);

public:
// To discourage creating copies, the assignment operator is still deleted.
vTensorStorage& operator=(const vTensorStorage& other) = delete;

vTensorStorage(vTensorStorage&& other) = default;
Expand Down Expand Up @@ -158,6 +170,22 @@ class vTensor final {
vTensor(const vTensor& other) = delete;
vTensor& operator=(const vTensor& other) = delete;

/*
* This constructor allows for the creation of a vTensor that references the
* same buffer resource of another vTensor, but with different sizes and
* strides metatdata. The created vTensor will not own the underlying
* resource. This is only applicable for buffer backed tensors at the moment.
*
* The offset_numel argument allows the aliased tensor's memory region to
* begin at an offset of N elements from the start of the original tensor's
* buffer.
*/
vTensor(
const vTensor& other,
const std::vector<int64_t>& sizes,
const std::vector<int64_t>& strides,
const size_t offset_numel = 0);

vTensor(vTensor&& other) = default;
vTensor& operator=(vTensor&& other) = default;

Expand Down
11 changes: 11 additions & 0 deletions backends/vulkan/runtime/graph/ComputeGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,17 @@ ValueRef ComputeGraph::add_tensor(
sizes, dtype, suggested_memory_layout(sizes), shared_object_idx);
}

ValueRef ComputeGraph::add_tensor_view(
const ValueRef vref,
const std::vector<int64_t>& sizes,
const std::vector<int64_t>& strides,
const size_t offset_numel) {
const vTensorPtr t = get_tensor(vref);
ValueRef idx(static_cast<int>(values_.size()));
values_.emplace_back(api::vTensor(*t, sizes, strides, offset_numel));
return idx;
}

ValueRef ComputeGraph::add_tensorref(
const std::vector<int64_t>& sizes,
const vkapi::ScalarType dtype,
Expand Down
11 changes: 11 additions & 0 deletions backends/vulkan/runtime/graph/ComputeGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,17 @@ class ComputeGraph final {
const ValueRef vref,
const utils::GPUMemoryLayout memory_layout);

/*
* Use the copy constructor of `api::vTensor` to create a "view" of the
* `vTensor` value at `vref`. See the copy constructor of `api::vTensor` for
* more details.
*/
ValueRef add_tensor_view(
const ValueRef vref,
const std::vector<int64_t>& sizes,
const std::vector<int64_t>& strides,
const size_t offset_numel = 0);

/*
* Add a `TensorRef` value to the graph with the specific properties. A
* `TensorRef` is a reference to a `api::vTensor` whose data is stored in an
Expand Down
22 changes: 18 additions & 4 deletions backends/vulkan/runtime/vk_api/memory/Allocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ Allocation::Allocation()
: memory_requirements{},
create_info{},
allocator(VK_NULL_HANDLE),
allocation(VK_NULL_HANDLE) {}
allocation(VK_NULL_HANDLE),
is_copy_(false) {}

Allocation::Allocation(
VmaAllocator vma_allocator,
Expand All @@ -38,16 +39,25 @@ Allocation::Allocation(
: memory_requirements(mem_props),
create_info(create_info),
allocator(vma_allocator),
allocation(VK_NULL_HANDLE) {
allocation(VK_NULL_HANDLE),
is_copy_(false) {
VK_CHECK(vmaAllocateMemory(
allocator, &memory_requirements, &create_info, &allocation, nullptr));
}

Allocation::Allocation(const Allocation& other) noexcept
: memory_requirements(other.memory_requirements),
create_info(other.create_info),
allocator(other.allocator),
allocation(other.allocation),
is_copy_(true) {}

Allocation::Allocation(Allocation&& other) noexcept
: memory_requirements(other.memory_requirements),
create_info(other.create_info),
allocator(other.allocator),
allocation(other.allocation) {
allocation(other.allocation),
is_copy_(other.is_copy_) {
other.allocation = VK_NULL_HANDLE;
}

Expand All @@ -58,14 +68,18 @@ Allocation& Allocation::operator=(Allocation&& other) noexcept {
create_info = other.create_info;
allocator = other.allocator;
allocation = other.allocation;
is_copy_ = other.is_copy_;

other.allocation = tmp_allocation;

return *this;
}

Allocation::~Allocation() {
if (VK_NULL_HANDLE != allocation) {
// Do not destroy the VmaAllocation if this class instance is a copy of some
// other class instance, since this means that this class instance does not
// have ownership of the underlying resource.
if (VK_NULL_HANDLE != allocation && !is_copy_) {
vmaFreeMemory(allocator, allocation);
}
}
Expand Down
30 changes: 29 additions & 1 deletion backends/vulkan/runtime/vk_api/memory/Allocation.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,23 @@ struct Allocation final {
const VkMemoryRequirements&,
const VmaAllocationCreateInfo&);

Allocation(const Allocation&) = delete;
protected:
/*
* The Copy constructor allows for creation of a class instance that are
* "aliases" of another class instance. The resulting class instance will not
* have ownership of the underlying VmaAllocation.
*
* This behaviour is analogous to creating a copy of a pointer, thus it is
* unsafe, as the original class instance may be destroyed before the copy.
* These constructors are therefore marked protected so that they may be used
* only in situations where the lifetime of the original class instance is
* guaranteed to exceed, or at least be the same as, the lifetime of the
* copied class instance.
*/
Allocation(const Allocation&) noexcept;

public:
// To discourage creating copies, the assignment operator is still deleted.
Allocation& operator=(const Allocation&) = delete;

Allocation(Allocation&&) noexcept;
Expand All @@ -47,9 +63,21 @@ struct Allocation final {
// Handles to the allocated memory
VmaAllocation allocation;

private:
// Indicates whether this class instance is a copy of another class instance,
// in which case it does not have ownership of the underlying VmaAllocation
bool is_copy_;

public:
operator bool() const {
return (allocation != VK_NULL_HANDLE);
}

inline bool is_copy() const {
return is_copy_;
}

friend class VulkanBuffer;
};

} // namespace vkapi
Expand Down
26 changes: 25 additions & 1 deletion backends/vulkan/runtime/vk_api/memory/Buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ VulkanBuffer::VulkanBuffer()
allocator_(VK_NULL_HANDLE),
memory_{},
owns_memory_(false),
is_copy_(false),
handle_(VK_NULL_HANDLE) {}

VulkanBuffer::VulkanBuffer(
Expand All @@ -37,6 +38,7 @@ VulkanBuffer::VulkanBuffer(
allocator_(vma_allocator),
memory_{},
owns_memory_(allocate_memory),
is_copy_(false),
handle_(VK_NULL_HANDLE) {
// If the buffer size is 0, allocate a buffer with a size of 1 byte. This is
// to ensure that there will be some resource that can be bound to a shader.
Expand Down Expand Up @@ -74,11 +76,29 @@ VulkanBuffer::VulkanBuffer(
}
}

VulkanBuffer::VulkanBuffer(
const VulkanBuffer& other,
const VkDeviceSize offset,
const VkDeviceSize range) noexcept
: buffer_properties_(other.buffer_properties_),
allocator_(other.allocator_),
memory_(other.memory_),
owns_memory_(other.owns_memory_),
is_copy_(true),
handle_(other.handle_) {
// TODO: set the offset and range appropriately
buffer_properties_.mem_offset = other.buffer_properties_.mem_offset + offset;
if (range != VK_WHOLE_SIZE) {
buffer_properties_.mem_range = range;
}
}

VulkanBuffer::VulkanBuffer(VulkanBuffer&& other) noexcept
: buffer_properties_(other.buffer_properties_),
allocator_(other.allocator_),
memory_(std::move(other.memory_)),
owns_memory_(other.owns_memory_),
is_copy_(other.is_copy_),
handle_(other.handle_) {
other.handle_ = VK_NULL_HANDLE;
}
Expand All @@ -91,6 +111,7 @@ VulkanBuffer& VulkanBuffer::operator=(VulkanBuffer&& other) noexcept {
allocator_ = other.allocator_;
memory_ = std::move(other.memory_);
owns_memory_ = other.owns_memory_;
is_copy_ = other.is_copy_;
handle_ = other.handle_;

other.handle_ = tmp_buffer;
Expand All @@ -100,7 +121,10 @@ VulkanBuffer& VulkanBuffer::operator=(VulkanBuffer&& other) noexcept {
}

VulkanBuffer::~VulkanBuffer() {
if (VK_NULL_HANDLE != handle_) {
// Do not destroy the VkBuffer if this class instance is a copy of another
// class instance, since this means that this class instance does not have
// ownership of the underlying resource.
if (VK_NULL_HANDLE != handle_ && !is_copy_) {
if (owns_memory_) {
vmaDestroyBuffer(allocator_, handle_, memory_.allocation);
} else {
Expand Down
Loading
Loading