Skip to content

Commit 4e039d0

Browse files
[ET-VK] Using a single GPU buffer for all tensor uniforms.
Pull Request resolved: #7015 This diff changes Tensor class to store all uniforms in a single uniform buffer. Entities stored in uniforms ie. size, stride, numel and logical limits are now stored in a single buffer and their offsets are stored as unsigned ints in Tensor class. Other changes includes: Adding a new ctor for ParamsBuffer class to allow allocation with size without data ptr. Adding an offset input to Buffer::data function. Adding an offset parameter to BufferBindInfo ctor, so additional offset can be supplied when binding a buffer. ghstack-source-id: 256728325 @exported-using-ghexport Differential Revision: [D65841750](https://our.internmc.facebook.com/intern/diff/D65841750/) Co-authored-by: Vivek Trivedi <[email protected]>
1 parent a9565aa commit 4e039d0

File tree

6 files changed

+119
-56
lines changed

6 files changed

+119
-56
lines changed

backends/vulkan/runtime/api/containers/ParamsBuffer.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@ class ParamsBuffer final {
3333
vulkan_buffer_(
3434
context_p_->adapter_ptr()->vma().create_params_buffer(block)) {}
3535

36+
template <typename Block>
37+
ParamsBuffer(Context* context_p, const VkDeviceSize nbytes)
38+
: context_p_(context_p),
39+
nbytes_(nbytes),
40+
vulkan_buffer_(
41+
context_p_->adapter_ptr()->vma().create_uniform_buffer(nbytes)) {}
42+
3643
ParamsBuffer(const ParamsBuffer&);
3744
ParamsBuffer& operator=(const ParamsBuffer&);
3845

@@ -50,14 +57,11 @@ class ParamsBuffer final {
5057
}
5158

5259
template <typename Block>
53-
void update(const Block& block) {
54-
if (sizeof(block) != nbytes_) {
55-
VK_THROW("Attempted to update ParamsBuffer with data of different size");
56-
}
60+
void update(const Block& block, const uint32_t offset = 0) {
5761
// Fill the uniform buffer with data in block
5862
{
5963
vkapi::MemoryMap mapping(vulkan_buffer_, vkapi::kWrite);
60-
Block* data_ptr = mapping.template data<Block>();
64+
Block* data_ptr = mapping.template data<Block>(offset);
6165

6266
*data_ptr = block;
6367
}

backends/vulkan/runtime/api/containers/Tensor.cpp

Lines changed: 81 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -451,11 +451,13 @@ vTensor::vTensor(
451451
unsqueezed_strides_{unsqueeze_strides(strides_, numel_)},
452452
padded_numel_(utils::multiply_integers(padded_sizes_)),
453453
logical_limits_{{0, 0, 0}},
454+
uniforms_(),
454455
// Utility Uniform Buffers that can be passed to shaders as arguments
455-
sizes_uniform_(),
456-
strides_uniform_(),
457-
numel_uniform_(),
458-
logical_limits_uniform_(),
456+
uniforms_size_(0),
457+
sizes_uniform_offset_(kUniformOffsetUnset),
458+
unsqueezed_strides_offset_(kUniformOffsetUnset),
459+
numel_uniform_offset_(kUniformOffsetUnset),
460+
logical_limits_uniform_offset_(kUniformOffsetUnset),
459461
// Construct Tensor storage
460462
storage_(
461463
context,
@@ -497,11 +499,13 @@ vTensor::vTensor(
497499
unsqueezed_strides_(),
498500
padded_numel_(utils::multiply_integers(padded_sizes_)),
499501
logical_limits_(),
502+
uniforms_(),
500503
// Utility Uniform Buffers that can be passed to shaders as arguments
501-
sizes_uniform_(),
502-
strides_uniform_(),
503-
numel_uniform_(),
504-
logical_limits_uniform_(),
504+
uniforms_size_(0),
505+
sizes_uniform_offset_(kUniformOffsetUnset),
506+
unsqueezed_strides_offset_(kUniformOffsetUnset),
507+
numel_uniform_offset_(kUniformOffsetUnset),
508+
logical_limits_uniform_offset_(kUniformOffsetUnset),
505509
// Construct Tensor storage
506510
storage_(context, image) {
507511
set_logical_limits(storage_.image_extents_);
@@ -522,11 +526,13 @@ vTensor::vTensor(vTensor& other)
522526
other.unsqueezed_strides_.end()},
523527
padded_numel_(other.padded_numel_),
524528
logical_limits_{other.logical_limits_},
529+
uniforms_(),
525530
// Empty initialize Utility Uniform Buffers
526-
sizes_uniform_(),
527-
strides_uniform_(),
528-
numel_uniform_(),
529-
logical_limits_uniform_(),
531+
uniforms_size_(0),
532+
sizes_uniform_offset_(kUniformOffsetUnset),
533+
unsqueezed_strides_offset_(kUniformOffsetUnset),
534+
numel_uniform_offset_(kUniformOffsetUnset),
535+
logical_limits_uniform_offset_(kUniformOffsetUnset),
530536
// Copy Tensor storage
531537
storage_(other.storage_) {}
532538

@@ -547,11 +553,13 @@ vTensor::vTensor(
547553
unsqueezed_strides_{unsqueeze_strides(strides_, numel_)},
548554
padded_numel_(utils::multiply_integers(padded_sizes_)),
549555
logical_limits_(other.logical_limits_),
556+
uniforms_(),
550557
// Empty initialize Utility Uniform Buffers
551-
sizes_uniform_(),
552-
strides_uniform_(),
553-
numel_uniform_(),
554-
logical_limits_uniform_(),
558+
uniforms_size_(0),
559+
sizes_uniform_offset_(kUniformOffsetUnset),
560+
unsqueezed_strides_offset_(kUniformOffsetUnset),
561+
numel_uniform_offset_(kUniformOffsetUnset),
562+
logical_limits_uniform_offset_(kUniformOffsetUnset),
555563
// Copy Tensor storage
556564
storage_(other.storage_, vkapi::element_size(dtype_) * offset_numel) {
557565
VK_CHECK_COND(
@@ -612,33 +620,66 @@ utils::GPUMemoryLayout vTensor::estimate_memory_layout() const {
612620
}
613621

614622
const vkapi::BufferBindInfo vTensor::sizes_ubo() {
615-
if (!sizes_uniform_.buffer()) {
616-
sizes_uniform_ =
617-
ParamsBuffer(storage_.context_, utils::make_whcn_ivec4(sizes_));
623+
if (!uniforms_.buffer()) {
624+
uniforms_ = ParamsBuffer(storage_.context_, kMaxUniformBufferSize);
618625
}
619-
return vkapi::BufferBindInfo(sizes_uniform_.buffer());
626+
if (sizes_uniform_offset_ == kUniformOffsetUnset) {
627+
VK_CHECK_COND(
628+
(uniforms_size_ + kSizePerUniform) <= kMaxUniformBufferSize,
629+
"Uniform data allocation has exceeded Tensor uniform buffer size");
630+
sizes_uniform_offset_ = uniforms_size_;
631+
uniforms_size_ += kSizePerUniform;
632+
uniforms_.update(utils::make_whcn_ivec4(sizes_), sizes_uniform_offset_);
633+
}
634+
return vkapi::BufferBindInfo(uniforms_.buffer(), sizes_uniform_offset_);
620635
}
621636

622637
const vkapi::BufferBindInfo vTensor::strides_ubo() {
623-
if (!strides_uniform_.buffer()) {
624-
strides_uniform_ = ParamsBuffer(
625-
storage_.context_, utils::make_whcn_ivec4(unsqueezed_strides_));
638+
if (!uniforms_.buffer()) {
639+
uniforms_ = ParamsBuffer(storage_.context_, kMaxUniformBufferSize);
640+
}
641+
if (unsqueezed_strides_offset_ == kUniformOffsetUnset) {
642+
VK_CHECK_COND(
643+
(uniforms_size_ + kSizePerUniform) <= kMaxUniformBufferSize,
644+
"Uniform data allocation has exceeded Tensor uniform buffer size");
645+
unsqueezed_strides_offset_ = uniforms_size_;
646+
uniforms_size_ += kSizePerUniform;
647+
uniforms_.update(
648+
utils::make_whcn_ivec4(unsqueezed_strides_),
649+
unsqueezed_strides_offset_);
626650
}
627-
return vkapi::BufferBindInfo(strides_uniform_.buffer());
651+
return vkapi::BufferBindInfo(uniforms_.buffer(), unsqueezed_strides_offset_);
628652
}
629653

630654
const vkapi::BufferBindInfo vTensor::logical_limits_ubo() {
631-
if (!logical_limits_uniform_.buffer()) {
632-
logical_limits_uniform_ = ParamsBuffer(storage_.context_, logical_limits_);
655+
if (!uniforms_.buffer()) {
656+
uniforms_ = ParamsBuffer(storage_.context_, kMaxUniformBufferSize);
633657
}
634-
return vkapi::BufferBindInfo(logical_limits_uniform_.buffer());
658+
if (logical_limits_uniform_offset_ == kUniformOffsetUnset) {
659+
VK_CHECK_COND(
660+
(uniforms_size_ + kSizePerUniform) <= kMaxUniformBufferSize,
661+
"Uniform data allocation has exceeded Tensor uniform buffer size");
662+
logical_limits_uniform_offset_ = uniforms_size_;
663+
uniforms_size_ += kSizePerUniform;
664+
uniforms_.update(logical_limits_, logical_limits_uniform_offset_);
665+
}
666+
return vkapi::BufferBindInfo(
667+
uniforms_.buffer(), logical_limits_uniform_offset_);
635668
}
636669

637670
const vkapi::BufferBindInfo vTensor::numel_ubo() {
638-
if (!numel_uniform_.buffer()) {
639-
numel_uniform_ = ParamsBuffer(storage_.context_, numel_);
671+
if (!uniforms_.buffer()) {
672+
uniforms_ = ParamsBuffer(storage_.context_, kMaxUniformBufferSize);
673+
}
674+
if (numel_uniform_offset_ == kUniformOffsetUnset) {
675+
VK_CHECK_COND(
676+
(uniforms_size_ + kSizePerUniform) <= kMaxUniformBufferSize,
677+
"Uniform data allocation has exceeded Tensor uniform buffer size");
678+
numel_uniform_offset_ = uniforms_size_;
679+
uniforms_size_ += kSizePerUniform;
680+
uniforms_.update(numel_, numel_uniform_offset_);
640681
}
641-
return vkapi::BufferBindInfo(numel_uniform_.buffer());
682+
return vkapi::BufferBindInfo(uniforms_.buffer(), numel_uniform_offset_);
642683
}
643684

644685
size_t vTensor::staging_buffer_numel() const {
@@ -690,17 +731,19 @@ void vTensor::update_metadata() {
690731
set_logical_limits(
691732
calculate_image_extents(padded_sizes_, axis_map_, packed_dim_));
692733

693-
if (sizes_uniform_.buffer()) {
694-
sizes_uniform_.update(utils::make_whcn_ivec4(sizes_));
734+
if (sizes_uniform_offset_ != kUniformOffsetUnset) {
735+
uniforms_.update(utils::make_whcn_ivec4(sizes_), sizes_uniform_offset_);
695736
}
696-
if (strides_uniform_.buffer()) {
697-
strides_uniform_.update(utils::make_whcn_ivec4(unsqueezed_strides_));
737+
if (unsqueezed_strides_offset_ != kUniformOffsetUnset) {
738+
uniforms_.update(
739+
utils::make_whcn_ivec4(unsqueezed_strides_),
740+
unsqueezed_strides_offset_);
698741
}
699-
if (numel_uniform_.buffer()) {
700-
numel_uniform_.update(numel_);
742+
if (numel_uniform_offset_ != kUniformOffsetUnset) {
743+
uniforms_.update(numel_, numel_uniform_offset_);
701744
}
702-
if (logical_limits_uniform_.buffer()) {
703-
logical_limits_uniform_.update(logical_limits_);
745+
if (logical_limits_uniform_offset_ != kUniformOffsetUnset) {
746+
uniforms_.update(logical_limits_, logical_limits_uniform_offset_);
704747
}
705748
}
706749

backends/vulkan/runtime/api/containers/Tensor.h

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -297,18 +297,32 @@ class vTensor final {
297297
TextureLimits logical_limits_;
298298

299299
/*
300-
* Utility GPU buffers that can be passed to shaders in order to convey tensor
301-
* metadata. These buffers will be initialized the first time they are
302-
* accessed via the corresponding *_ubo() function, and their contents will be
303-
* updated whenever virtual_resize() is called.
300+
* Utility GPU buffer that can be passed to shaders in order to convey tensor
301+
* metadata. Uniform buffer will be initialized only the first time a ubo is
302+
* requested. Buffer offsets will be initialized the first time they are
303+
* accessed via the corresponding *_ubo() function. Uniform buffer's contents
304+
* will be updated whenever virtual_resize() is called.
304305
*
305306
* Refer to the comments for the corresponding *_ubo() functions for more
306307
* context about the data contained in each buffer.
307308
*/
308-
ParamsBuffer sizes_uniform_;
309-
ParamsBuffer strides_uniform_;
310-
ParamsBuffer numel_uniform_;
311-
ParamsBuffer logical_limits_uniform_;
309+
ParamsBuffer uniforms_;
310+
uint32_t uniforms_size_;
311+
uint32_t sizes_uniform_offset_;
312+
uint32_t unsqueezed_strides_offset_;
313+
uint32_t numel_uniform_offset_;
314+
uint32_t logical_limits_uniform_offset_;
315+
316+
// Size allocated for each uniform
317+
// each uniform is assumed to be a vec of 4 ints to maintain 16 byte alignemnt
318+
constexpr static size_t kSizePerUniform = sizeof(utils::ivec4);
319+
// Total size of tensor's uniform buffer
320+
constexpr static size_t kMaxUniformBufferSize =
321+
4 * // we have 4 uniforms that are passed on to shaders
322+
kSizePerUniform;
323+
324+
// Initial value of uniform buffer offsets
325+
constexpr static uint32_t kUniformOffsetUnset = kMaxUniformBufferSize;
312326

313327
vTensorStorage storage_;
314328

backends/vulkan/runtime/vk_api/Descriptor.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,11 @@ namespace vkapi {
2323
BufferBindInfo::BufferBindInfo()
2424
: handle(VK_NULL_HANDLE), offset(0u), range(0u) {}
2525

26-
BufferBindInfo::BufferBindInfo(const VulkanBuffer& buffer_p)
26+
BufferBindInfo::BufferBindInfo(
27+
const VulkanBuffer& buffer_p,
28+
const uint32_t offset_p)
2729
: handle(buffer_p.handle()),
28-
offset(buffer_p.mem_offset()),
30+
offset(buffer_p.mem_offset() + offset_p),
2931
range(buffer_p.mem_range()) {}
3032

3133
//

backends/vulkan/runtime/vk_api/Descriptor.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ struct BufferBindInfo final {
3333
VkDeviceSize range;
3434

3535
BufferBindInfo();
36-
BufferBindInfo(const VulkanBuffer& buffer_p);
36+
BufferBindInfo(const VulkanBuffer& buffer_p, const uint32_t offset_p = 0u);
3737
};
3838

3939
struct ParamsBindList final {

backends/vulkan/runtime/vk_api/memory/Buffer.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,8 +195,8 @@ class MemoryMap final {
195195

196196
public:
197197
template <typename T>
198-
T* data() {
199-
return reinterpret_cast<T*>(data_);
198+
T* data(const uint32_t offset = 0) {
199+
return reinterpret_cast<T*>(static_cast<uint8_t*>(data_) + offset);
200200
}
201201

202202
inline size_t nbytes() {

0 commit comments

Comments
 (0)