Skip to content

Commit cc51e10

Browse files
SS-JIApytorchmergebot
authored andcommitted
[ET-VK] Enable Dynamic shape support via tensor virtual and physical resizing (pytorch#121598)
Summary: ## Context This changeset lays the foundations for supporting dynamic shapes in the ExecuTorch Vulkan delegate via allowing Tensors to be resized in one of two ways: 1. Discarding underlying `vkImage` or `vkBuffer` and reallocating a new `vkImage` or `vkBuffer` with updated sizes. This method is intended to be used when the current `vkImage` or `vkBuffer` is not large enough to contain the new sizes. 2. Update the tensor's size metadata without reallocating any new resources. This allows shaders to interpret the underlying `vkImage` or `vkBuffer` as if it were smaller than it actually is, and allows command buffers to be preserved when sizes are changed. Test Plan: Check CI. Tests have also been added to `vulkan_compute_api_test` that test the two methods of tensor resizing. Differential Revision: D54728401 Pull Request resolved: pytorch#121598 Approved by: https://github.com/jorgep31415
1 parent 2a99e6f commit cc51e10

File tree

7 files changed

+336
-23
lines changed

7 files changed

+336
-23
lines changed

aten/src/ATen/native/vulkan/api/Context.h

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ class Context final {
205205
class UniformParamsBuffer final {
206206
private:
207207
Context* context_p_;
208+
size_t nbytes_;
208209
VulkanBuffer vulkan_buffer_;
209210

210211
public:
@@ -213,6 +214,7 @@ class UniformParamsBuffer final {
213214
template <typename Block>
214215
UniformParamsBuffer(Context* context_p, const Block& block)
215216
: context_p_(context_p),
217+
nbytes_(sizeof(block)),
216218
vulkan_buffer_(
217219
context_p_->adapter_ptr()->vma().create_params_buffer(block)) {}
218220

@@ -231,13 +233,29 @@ class UniformParamsBuffer final {
231233
VulkanBuffer& buffer() {
232234
return vulkan_buffer_;
233235
}
236+
237+
template <typename Block>
238+
void update(const Block& block) {
239+
if (sizeof(block) != nbytes_) {
240+
VK_THROW(
241+
"Attempted to update UniformParamsBuffer with data of different size");
242+
}
243+
// Fill the uniform buffer with data in block
244+
{
245+
MemoryMap mapping(vulkan_buffer_, MemoryAccessType::WRITE);
246+
Block* data_ptr = mapping.template data<Block>();
247+
248+
*data_ptr = block;
249+
}
250+
}
234251
};
235252

236253
class StorageBuffer final {
237254
private:
238255
Context* context_p_;
239256
ScalarType dtype_;
240257
size_t numel_;
258+
size_t nbytes_;
241259
VulkanBuffer vulkan_buffer_;
242260

243261
public:
@@ -249,8 +267,9 @@ class StorageBuffer final {
249267
: context_p_(context_p),
250268
dtype_(dtype),
251269
numel_(numel),
270+
nbytes_(element_size(dtype_) * numel_),
252271
vulkan_buffer_(context_p_->adapter_ptr()->vma().create_storage_buffer(
253-
element_size(dtype_) * numel_,
272+
nbytes_,
254273
gpuonly)) {}
255274

256275
StorageBuffer(const StorageBuffer&) = delete;
@@ -270,6 +289,14 @@ class StorageBuffer final {
270289
inline VulkanBuffer& buffer() {
271290
return vulkan_buffer_;
272291
}
292+
293+
inline size_t numel() {
294+
return numel_;
295+
}
296+
297+
inline size_t nbytes() {
298+
return nbytes_;
299+
}
273300
};
274301

275302
bool available();

aten/src/ATen/native/vulkan/api/Resource.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,10 @@ class VulkanBuffer final {
151151
return (memory_.allocation != VK_NULL_HANDLE);
152152
}
153153

154+
inline bool owns_memory() const {
155+
return owns_memory_;
156+
}
157+
154158
operator bool() const {
155159
return (handle_ != VK_NULL_HANDLE);
156160
}
@@ -372,6 +376,10 @@ class VulkanImage final {
372376
return (memory_.allocation != VK_NULL_HANDLE);
373377
}
374378

379+
inline bool owns_memory() const {
380+
return owns_memory_;
381+
}
382+
375383
inline operator bool() const {
376384
return (handles_.image != VK_NULL_HANDLE);
377385
}

aten/src/ATen/native/vulkan/api/ShaderRegistry.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
#define VK_KERNEL(shader_name) \
1313
::at::native::vulkan::api::shader_registry().get_shader_info(#shader_name)
1414

15+
#define VK_KERNEL_FROM_STR(shader_name_str) \
16+
::at::native::vulkan::api::shader_registry().get_shader_info(shader_name_str)
17+
1518
namespace at {
1619
namespace native {
1720
namespace vulkan {

aten/src/ATen/native/vulkan/api/Tensor.cpp

Lines changed: 140 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -318,8 +318,8 @@ api::UniformParamsBuffer make_metadata_uniform(
318318
}
319319

320320
vTensor::BufferMetadata metadata{
321-
api::utils::make_nchw_uvec4(sizes),
322-
api::utils::make_nchw_uvec4(strides),
321+
api::utils::make_whcn_uvec4(sizes),
322+
api::utils::make_whcn_uvec4(strides),
323323
api::utils::safe_downcast<uint32_t>(sizes.size()),
324324
api::utils::safe_downcast<uint32_t>(api::utils::multiply_integers(sizes)),
325325
};
@@ -347,12 +347,13 @@ vTensor::vTensor(
347347
strides_{calc_strides(sizes, memory_layout_, storage_type)},
348348
gpu_sizes_{calc_gpu_sizes(sizes, memory_layout_, storage_type)},
349349
gpu_strides_{calc_strides(gpu_sizes_, memory_layout_, storage_type)},
350-
// Vulkan uniform buffer containing sizes and stride info
351-
metadata_uniform_{make_metadata_uniform(
352-
context,
353-
gpu_sizes_,
354-
gpu_strides_,
355-
storage_type)},
350+
virtual_extents_(
351+
create_image_extents(gpu_sizes_, storage_type, memory_layout)),
352+
// Utility Uniform Buffers that can be passed to shaders as arguments
353+
metadata_uniform_(),
354+
cpu_sizes_uniform_(nullptr),
355+
gpu_sizes_uniform_(nullptr),
356+
extents_uniform_(nullptr),
356357
// Construct Tensor storage
357358
view_(std::make_shared<vTensorStorage>(
358359
context,
@@ -377,12 +378,13 @@ vTensor::vTensor(
377378
strides_{calc_strides(sizes, memory_layout_, storage_type)},
378379
gpu_sizes_{calc_gpu_sizes(sizes, memory_layout_, storage_type)},
379380
gpu_strides_{calc_strides(gpu_sizes_, memory_layout_, storage_type)},
381+
virtual_extents_(
382+
create_image_extents(gpu_sizes_, storage_type, memory_layout)),
380383
// Vulkan uniform buffer containing sizes and stride info
381-
metadata_uniform_{make_metadata_uniform(
382-
context,
383-
gpu_sizes_,
384-
gpu_strides_,
385-
storage_type)},
384+
metadata_uniform_(),
385+
cpu_sizes_uniform_(nullptr),
386+
gpu_sizes_uniform_(nullptr),
387+
extents_uniform_(nullptr),
386388
// Quantization params
387389
is_quantized_{true},
388390
q_scale_{q_scale},
@@ -425,10 +427,47 @@ api::VulkanBuffer& vTensor::buffer(
425427
return view_->buffer_;
426428
}
427429

430+
api::VulkanBuffer& vTensor::buffer_metadata() {
431+
if (!metadata_uniform_.buffer()) {
432+
metadata_uniform_ = make_metadata_uniform(
433+
view_->context_, gpu_sizes_, gpu_strides_, storage_type());
434+
}
435+
return metadata_uniform_.buffer();
436+
}
437+
438+
std::shared_ptr<api::UniformParamsBuffer> vTensor::cpu_sizes_ubo() {
439+
if (!cpu_sizes_uniform_) {
440+
cpu_sizes_uniform_.reset(new api::UniformParamsBuffer(
441+
view_->context_, api::utils::make_whcn_ivec4(sizes_)));
442+
}
443+
return cpu_sizes_uniform_;
444+
}
445+
446+
std::shared_ptr<api::UniformParamsBuffer> vTensor::gpu_sizes_ubo() {
447+
if (!gpu_sizes_uniform_) {
448+
gpu_sizes_uniform_.reset(new api::UniformParamsBuffer(
449+
view_->context_, api::utils::make_whcn_ivec4(gpu_sizes_)));
450+
}
451+
return gpu_sizes_uniform_;
452+
}
453+
454+
std::shared_ptr<api::UniformParamsBuffer> vTensor::extents_ubo() {
455+
if (!extents_uniform_) {
456+
extents_uniform_.reset(new api::UniformParamsBuffer(
457+
view_->context_,
458+
api::utils::uvec4(
459+
{view_->extents_.data[0],
460+
view_->extents_.data[1],
461+
view_->extents_.data[2],
462+
1u})));
463+
}
464+
return extents_uniform_;
465+
}
466+
428467
vTensor::BufferMetadata vTensor::get_cpu_buffer_metadata() const {
429468
return {
430-
api::utils::make_nchw_uvec4(sizes_),
431-
api::utils::make_nchw_uvec4(strides_),
469+
api::utils::make_whcn_uvec4(sizes_),
470+
api::utils::make_whcn_uvec4(strides_),
432471
api::utils::safe_downcast<uint32_t>(sizes_.size()),
433472
api::utils::safe_downcast<uint32_t>(
434473
api::utils::multiply_integers(sizes_)),
@@ -473,6 +512,65 @@ void vTensor::bind_allocation(const api::MemoryAllocation& allocation) {
473512
}
474513
}
475514

515+
void vTensor::update_size_metadata(const std::vector<int64_t>& new_sizes) {
516+
sizes_ = new_sizes;
517+
gpu_sizes_ = calc_gpu_sizes(sizes_, memory_layout_, storage_type());
518+
virtual_extents_ =
519+
create_image_extents(gpu_sizes_, storage_type(), memory_layout_);
520+
521+
if (cpu_sizes_uniform_) {
522+
cpu_sizes_uniform_->update(api::utils::make_whcn_ivec4(sizes_));
523+
}
524+
525+
if (gpu_sizes_uniform_) {
526+
gpu_sizes_uniform_->update(api::utils::make_whcn_ivec4(gpu_sizes_));
527+
}
528+
529+
if (extents_uniform_) {
530+
extents_uniform_->update(api::utils::uvec4(
531+
{virtual_extents_.data[0],
532+
virtual_extents_.data[1],
533+
virtual_extents_.data[2],
534+
1u}));
535+
}
536+
}
537+
538+
void vTensor::reallocate(const std::vector<int64_t>& new_sizes) {
539+
update_size_metadata(new_sizes);
540+
view_->discard_and_reallocate(
541+
calc_gpu_sizes(new_sizes, memory_layout_, storage_type()),
542+
memory_layout_,
543+
dtype_);
544+
}
545+
546+
void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {
547+
update_size_metadata(new_sizes);
548+
if (storage_type() == api::StorageType::BUFFER) {
549+
if (gpu_nbytes() > view_->buffer_.mem_size()) {
550+
VK_THROW(
551+
"Cannot virtual_resize a vTensor with sizes that require a larger "
552+
"buffer! reallocate() should be used instead.");
553+
}
554+
} else {
555+
bool valid_resize = true;
556+
if (virtual_extents_.data[0] > view_->extents_.data[0]) {
557+
valid_resize = false;
558+
}
559+
if (virtual_extents_.data[1] > view_->extents_.data[1]) {
560+
valid_resize = false;
561+
}
562+
if (virtual_extents_.data[2] > view_->extents_.data[2]) {
563+
valid_resize = false;
564+
}
565+
566+
if (!valid_resize) {
567+
VK_THROW(
568+
"Cannot virtual_resize a vTensor with sizes that require a larger "
569+
"image texture! reallocate() should be used instead.");
570+
}
571+
}
572+
}
573+
476574
//
477575
// vTensorStorage
478576
//
@@ -569,11 +667,16 @@ vTensorStorage::vTensorStorage(
569667
last_access_{} {}
570668

571669
vTensorStorage::~vTensorStorage() {
670+
flush();
671+
}
672+
673+
void vTensorStorage::flush() {
572674
if (image_) {
573675
context_->register_image_cleanup(image_);
574676
} else if (buffer_) {
575677
context_->register_buffer_cleanup(buffer_);
576678
}
679+
last_access_ = {};
577680
}
578681

579682
void vTensorStorage::transition(
@@ -663,6 +766,28 @@ void add_buffer_barrier(
663766
}
664767
}
665768

769+
void vTensorStorage::discard_and_reallocate(
770+
const std::vector<int64_t>& gpu_sizes,
771+
const api::GPUMemoryLayout gpu_memory_layout,
772+
const api::ScalarType dtype) {
773+
const bool image_owns_memory = image_.owns_memory();
774+
const bool buffer_owns_memory = buffer_.owns_memory();
775+
776+
flush();
777+
778+
extents_ = create_image_extents(gpu_sizes, storage_type_, gpu_memory_layout);
779+
image_ = allocate_image(
780+
context_,
781+
extents_,
782+
storage_type_,
783+
api::to_vkformat(dtype),
784+
image_owns_memory);
785+
786+
buffer_length_ = api::utils::multiply_integers(gpu_sizes);
787+
buffer_ = allocate_buffer(
788+
context_, buffer_length_, storage_type_, dtype, buffer_owns_memory);
789+
}
790+
666791
} // namespace vulkan
667792
} // namespace native
668793
} // namespace at

0 commit comments

Comments
 (0)