pytorch · kirklandsign · Aug 20, 2024 · Aug 16, 2024
@@ -13,6 +13,44 @@
 namespace vkcompute {
 namespace api {
 
+/*
+ * Given the strides of a buffer-backed tensor, find the index of the "fastest
+ * moving" dimension in WHCN dimension order. If multiple dims have the lowest
+ * stride, then the "earlier" dim is assumed to be the fastest moving (width is
+ * "earlier" than height).
+ */
+int32_t find_fastest_whcn_dim(const std::vector<int64_t>& strides) {
+  if (strides.size() == 0) {
+    return 0;
+  }
+  int32_t fastest_dim = 0;
+  int64_t min_stride = strides.at(0);
+  for (int d = strides.size() - 1; d >= 0; --d) {
+    if (strides.at(d) < min_stride) {
+      fastest_dim = d;
+      min_stride = strides.at(d);
+    }
+  }
+  return (strides.size() - 1 - fastest_dim);
+}
+
+/*
+ * Given the strides of a buffer-backed tensor, estimate the equivalent memory
+ * layout enum value by identifying the fastest moving dimension.
+ */
+utils::GPUMemoryLayout estimate_memory_layout(
+    const std::vector<int64_t>& strides) {
+  int32_t fastest_dim = find_fastest_whcn_dim(strides);
+  if (fastest_dim <= 3) {
+    return utils::GPUMemoryLayout(fastest_dim);
+  }
+
+  // TODO(ssjia) find a way to gracefully recover from this case by i.e. adding
+  // a UNKOWN GPUMemoryLayout. This is not high priority though because we don't
+  // expect this to ever come up in practice.
+  VK_THROW("No compatible GPUMemoryLayout value");
+}
+
 std::vector<int64_t> calculate_strides(
     const std::vector<int64_t>& sizes,
     const utils::GPUMemoryLayout memory_layout) {
@@ -166,6 +204,34 @@ vTensor::vTensor(
   }
 }
 
+vTensor::vTensor(
+    const vTensor& other,
+    const std::vector<int64_t>& sizes,
+    const std::vector<int64_t>& strides,
+    const size_t offset_numel)
+    : dtype_(other.dtype_),
+      memory_layout_(estimate_memory_layout(strides)),
+      // Copy tensor size metadata
+      sizes_(sizes.begin(), sizes.end()),
+      strides_(strides.begin(), strides.end()),
+      numel_(utils::multiply_integers(sizes_)),
+      padded_sizes_{calculate_padded_sizes(sizes, memory_layout_)},
+      unsqueezed_strides_{unsqueeze_strides(strides_, numel_)},
+      padded_numel_(utils::multiply_integers(padded_sizes_)),
+      texture_limits_{{0, 0, 0}},
+      // Empty initialize Utility Uniform Buffers
+      sizes_uniform_(),
+      strides_uniform_(),
+      numel_uniform_(),
+      texture_limits_uniform_(),
+      // Copy Tensor storage
+      storage_(other.storage_, vkapi::element_size(dtype_) * offset_numel) {
+  VK_CHECK_COND(
+      offset_numel + numel_ <= other.numel(),
+      "Tensor alias cannot access more elements than available in the original"
+      "tensor");
+}
+
 vkapi::VulkanImage& vTensor::image(
     vkapi::PipelineBarrier& pipeline_barrier,
     const vkapi::PipelineStageFlags stage) & {
@@ -428,6 +494,21 @@ vTensorStorage::vTensorStorage(
           allocate_memory)),
       last_access_{} {}
 
+vTensorStorage::vTensorStorage(
+    const vTensorStorage& other,
+    const size_t buffer_offset)
+    : context_(other.context_),
+      storage_type_{other.storage_type_},
+      image_extents_(other.image_extents_),
+      buffer_length_{other.buffer_length_},
+      image_(),
+      buffer_(other.buffer_, buffer_offset),
+      last_access_{other.last_access_} {
+  if (other.storage_type_ != utils::kBuffer) {
+    VK_THROW("Tensors with texture storage cannot be copied!");
+  }
+}
+
 vTensorStorage::~vTensorStorage() {
   flush();
 }

@@ -87,7 +87,19 @@ class vTensorStorage final {
       const vkapi::ScalarType dtype,
       const bool allocate_memory = true);
 
-  vTensorStorage(const vTensorStorage& other) = delete;
+ protected:
+  /*
+   * This allows for creation of tensors that use the same underlying storage
+   * as another tensor. Note that this functionality is currently enabled for
+   * tensors that have buffer storage only. The created tensor will not have
+   * ownership of the underlying VkBuffer. This constructor is marked protected
+   * because this behaviour is unsafe, since the original tensor may be
+   * destroyed before the copy is destroyed.
+   */
+  vTensorStorage(const vTensorStorage& other, const size_t buffer_offset = 0);
+
+ public:
+  // To discourage creating copies, the assignment operator is still deleted.
   vTensorStorage& operator=(const vTensorStorage& other) = delete;
 
   vTensorStorage(vTensorStorage&& other) = default;
@@ -158,6 +170,22 @@ class vTensor final {
   vTensor(const vTensor& other) = delete;
   vTensor& operator=(const vTensor& other) = delete;
 
+  /*
+   * This constructor allows for the creation of a vTensor that references the
+   * same buffer resource of another vTensor, but with different sizes and
+   * strides metatdata. The created vTensor will not own the underlying
+   * resource. This is only applicable for buffer backed tensors at the moment.
+   *
+   * The offset_numel argument allows the aliased tensor's memory region to
+   * begin at an offset of N elements from the start of the original tensor's
+   * buffer.
+   */
+  vTensor(
+      const vTensor& other,
+      const std::vector<int64_t>& sizes,
+      const std::vector<int64_t>& strides,
+      const size_t offset_numel = 0);
+
   vTensor(vTensor&& other) = default;
   vTensor& operator=(vTensor&& other) = default;
 

@@ -203,6 +203,17 @@ ValueRef ComputeGraph::add_tensor(
       sizes, dtype, suggested_memory_layout(sizes), shared_object_idx);
 }
 
+ValueRef ComputeGraph::add_tensor_view(
+    const ValueRef vref,
+    const std::vector<int64_t>& sizes,
+    const std::vector<int64_t>& strides,
+    const size_t offset_numel) {
+  const vTensorPtr t = get_tensor(vref);
+  ValueRef idx(static_cast<int>(values_.size()));
+  values_.emplace_back(api::vTensor(*t, sizes, strides, offset_numel));
+  return idx;
+}
+
 ValueRef ComputeGraph::add_tensorref(
     const std::vector<int64_t>& sizes,
     const vkapi::ScalarType dtype,

@@ -351,6 +351,17 @@ class ComputeGraph final {
       const ValueRef vref,
       const utils::GPUMemoryLayout memory_layout);
 
+  /*
+   * Use the copy constructor of `api::vTensor` to create a "view" of the
+   * `vTensor` value at `vref`. See the copy constructor of `api::vTensor` for
+   * more details.
+   */
+  ValueRef add_tensor_view(
+      const ValueRef vref,
+      const std::vector<int64_t>& sizes,
+      const std::vector<int64_t>& strides,
+      const size_t offset_numel = 0);
+
   /*
    * Add a `TensorRef` value to the graph with the specific properties. A
    * `TensorRef` is a reference to a `api::vTensor` whose data is stored in an

@@ -29,7 +29,8 @@ Allocation::Allocation()
     : memory_requirements{},
       create_info{},
       allocator(VK_NULL_HANDLE),
-      allocation(VK_NULL_HANDLE) {}
+      allocation(VK_NULL_HANDLE),
+      is_copy_(false) {}
 
 Allocation::Allocation(
     VmaAllocator vma_allocator,
@@ -38,16 +39,25 @@ Allocation::Allocation(
     : memory_requirements(mem_props),
       create_info(create_info),
       allocator(vma_allocator),
-      allocation(VK_NULL_HANDLE) {
+      allocation(VK_NULL_HANDLE),
+      is_copy_(false) {
   VK_CHECK(vmaAllocateMemory(
       allocator, &memory_requirements, &create_info, &allocation, nullptr));
 }
 
+Allocation::Allocation(const Allocation& other) noexcept
+    : memory_requirements(other.memory_requirements),
+      create_info(other.create_info),
+      allocator(other.allocator),
+      allocation(other.allocation),
+      is_copy_(true) {}
+
 Allocation::Allocation(Allocation&& other) noexcept
     : memory_requirements(other.memory_requirements),
       create_info(other.create_info),
       allocator(other.allocator),
-      allocation(other.allocation) {
+      allocation(other.allocation),
+      is_copy_(other.is_copy_) {
   other.allocation = VK_NULL_HANDLE;
 }
 
@@ -58,14 +68,18 @@ Allocation& Allocation::operator=(Allocation&& other) noexcept {
   create_info = other.create_info;
   allocator = other.allocator;
   allocation = other.allocation;
+  is_copy_ = other.is_copy_;
 
   other.allocation = tmp_allocation;
 
   return *this;
 }
 
 Allocation::~Allocation() {
-  if (VK_NULL_HANDLE != allocation) {
+  // Do not destroy the VmaAllocation if this class instance is a copy of some
+  // other class instance, since this means that this class instance does not
+  // have ownership of the underlying resource.
+  if (VK_NULL_HANDLE != allocation && !is_copy_) {
     vmaFreeMemory(allocator, allocation);
   }
 }

@@ -31,7 +31,23 @@ struct Allocation final {
       const VkMemoryRequirements&,
       const VmaAllocationCreateInfo&);
 
-  Allocation(const Allocation&) = delete;
+ protected:
+  /*
+   * The Copy constructor allows for creation of a class instance that are
+   * "aliases" of another class instance. The resulting class instance will not
+   * have ownership of the underlying VmaAllocation.
+   *
+   * This behaviour is analogous to creating a copy of a pointer, thus it is
+   * unsafe, as the original class instance may be destroyed before the copy.
+   * These constructors are therefore marked protected so that they may be used
+   * only in situations where the lifetime of the original class instance is
+   * guaranteed to exceed, or at least be the same as, the lifetime of the
+   * copied class instance.
+   */
+  Allocation(const Allocation&) noexcept;
+
+ public:
+  // To discourage creating copies, the assignment operator is still deleted.
   Allocation& operator=(const Allocation&) = delete;
 
   Allocation(Allocation&&) noexcept;
@@ -47,9 +63,21 @@ struct Allocation final {
   // Handles to the allocated memory
   VmaAllocation allocation;
 
+ private:
+  // Indicates whether this class instance is a copy of another class instance,
+  // in which case it does not have ownership of the underlying VmaAllocation
+  bool is_copy_;
+
+ public:
   operator bool() const {
     return (allocation != VK_NULL_HANDLE);
   }
+
+  inline bool is_copy() const {
+    return is_copy_;
+  }
+
+  friend class VulkanBuffer;
 };
 
 } // namespace vkapi

@@ -20,6 +20,7 @@ VulkanBuffer::VulkanBuffer()
       allocator_(VK_NULL_HANDLE),
       memory_{},
       owns_memory_(false),
+      is_copy_(false),
       handle_(VK_NULL_HANDLE) {}
 
 VulkanBuffer::VulkanBuffer(
@@ -37,6 +38,7 @@ VulkanBuffer::VulkanBuffer(
       allocator_(vma_allocator),
       memory_{},
       owns_memory_(allocate_memory),
+      is_copy_(false),
       handle_(VK_NULL_HANDLE) {
   // If the buffer size is 0, allocate a buffer with a size of 1 byte. This is
   // to ensure that there will be some resource that can be bound to a shader.
@@ -74,11 +76,29 @@ VulkanBuffer::VulkanBuffer(
   }
 }
 
+VulkanBuffer::VulkanBuffer(
+    const VulkanBuffer& other,
+    const VkDeviceSize offset,
+    const VkDeviceSize range) noexcept
+    : buffer_properties_(other.buffer_properties_),
+      allocator_(other.allocator_),
+      memory_(other.memory_),
+      owns_memory_(other.owns_memory_),
+      is_copy_(true),
+      handle_(other.handle_) {
+  // TODO: set the offset and range appropriately
+  buffer_properties_.mem_offset = other.buffer_properties_.mem_offset + offset;
+  if (range != VK_WHOLE_SIZE) {
+    buffer_properties_.mem_range = range;
+  }
+}
+
 VulkanBuffer::VulkanBuffer(VulkanBuffer&& other) noexcept
     : buffer_properties_(other.buffer_properties_),
       allocator_(other.allocator_),
       memory_(std::move(other.memory_)),
       owns_memory_(other.owns_memory_),
+      is_copy_(other.is_copy_),
       handle_(other.handle_) {
   other.handle_ = VK_NULL_HANDLE;
 }
@@ -91,6 +111,7 @@ VulkanBuffer& VulkanBuffer::operator=(VulkanBuffer&& other) noexcept {
   allocator_ = other.allocator_;
   memory_ = std::move(other.memory_);
   owns_memory_ = other.owns_memory_;
+  is_copy_ = other.is_copy_;
   handle_ = other.handle_;
 
   other.handle_ = tmp_buffer;
@@ -100,7 +121,10 @@ VulkanBuffer& VulkanBuffer::operator=(VulkanBuffer&& other) noexcept {
 }
 
 VulkanBuffer::~VulkanBuffer() {
-  if (VK_NULL_HANDLE != handle_) {
+  // Do not destroy the VkBuffer if this class instance is a copy of another
+  // class instance, since this means that this class instance does not have
+  // ownership of the underlying resource.
+  if (VK_NULL_HANDLE != handle_ && !is_copy_) {
     if (owns_memory_) {
       vmaDestroyBuffer(allocator_, handle_, memory_.allocation);
     } else {