Skip to content

Commit 07c77be

Browse files
SS-JIAfacebook-github-bot
authored andcommitted
Remove unnecessary member structs from Allocation struct to reduce vTensor size (#5392)
Summary: Pull Request resolved: #5392 ## Context Recently, the size of `vTensor` has grown dramatically, due to adding multiple metadata UBO member variables to store Tensor metadata. The size of the `vTensor` object was over 2KB on 64-bit machines. One large contributor to the size of `vTensor` was the `Allocation` class, which was storing various structs as class members. As a result of keeping these structs in memory, the size of each instance of `Allocation` was over 200 B. These structs are not necessary to keep in memory, thus they can be removed. Once removed, the size of `vTensor` is now halved to be 1KB. The size of `vTensor` can be further reduced by sharing metadata UBO member variables and being more conservative with keeping tensor metadata in memory. ghstack-source-id: 242810657 exported-using-ghexport Reviewed By: jorgep31415 Differential Revision: D62660980 fbshipit-source-id: ac4ae82ba7a560c162a4f3aea488647af434a332
1 parent a166a25 commit 07c77be

File tree

14 files changed

+45
-99
lines changed

14 files changed

+45
-99
lines changed

backends/vulkan/runtime/api/containers/StagingBuffer.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ class StagingBuffer final {
2727
size_t nbytes_;
2828
vkapi::VulkanBuffer vulkan_buffer_;
2929

30+
void* mapped_data_;
31+
3032
public:
3133
StagingBuffer(
3234
Context* context_p,
@@ -37,7 +39,8 @@ class StagingBuffer final {
3739
numel_(numel),
3840
nbytes_(element_size(dtype_) * numel_),
3941
vulkan_buffer_(
40-
context_p_->adapter_ptr()->vma().create_staging_buffer(nbytes_)) {}
42+
context_p_->adapter_ptr()->vma().create_staging_buffer(nbytes_)),
43+
mapped_data_(nullptr) {}
4144

4245
StagingBuffer(const StagingBuffer&) = delete;
4346
StagingBuffer& operator=(const StagingBuffer&) = delete;
@@ -58,7 +61,10 @@ class StagingBuffer final {
5861
}
5962

6063
inline void* data() {
61-
return vulkan_buffer_.allocation_info().pMappedData;
64+
if (!mapped_data_) {
65+
mapped_data_ = vulkan_buffer_.allocation_info().pMappedData;
66+
}
67+
return mapped_data_;
6268
}
6369

6470
inline size_t numel() {

backends/vulkan/runtime/api/containers/Tensor.cpp

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -436,17 +436,6 @@ size_t vTensor::staging_buffer_numel() const {
436436
return padded_numel_;
437437
}
438438

439-
VmaAllocationCreateInfo vTensor::get_allocation_create_info() const {
440-
switch (storage_type()) {
441-
case utils::kBuffer:
442-
return storage_.buffer_.allocation_create_info();
443-
case utils::kTexture2D:
444-
case utils::kTexture3D:
445-
return storage_.image_.allocation_create_info();
446-
}
447-
return {};
448-
}
449-
450439
VkMemoryRequirements vTensor::get_memory_requirements() const {
451440
switch (storage_type()) {
452441
case utils::kBuffer:

backends/vulkan/runtime/graph/containers/SharedObject.cpp

Lines changed: 5 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -15,47 +15,27 @@ namespace vkcompute {
1515
void SharedObject::add_user(ComputeGraph* const graph, const ValueRef idx) {
1616
vTensorPtr t = graph->get_tensor(idx);
1717

18-
//
1918
// Aggregate Memory Requirements
20-
//
21-
2219
const VkMemoryRequirements mem_reqs = t->get_memory_requirements();
2320
aggregate_memory_requirements.size =
2421
std::max(mem_reqs.size, aggregate_memory_requirements.size);
2522
aggregate_memory_requirements.alignment =
2623
std::max(mem_reqs.alignment, aggregate_memory_requirements.alignment);
2724
aggregate_memory_requirements.memoryTypeBits |= mem_reqs.memoryTypeBits;
2825

29-
//
30-
// Aggregate Allocation Create Info
31-
//
32-
33-
const VmaAllocationCreateInfo create_info = t->get_allocation_create_info();
34-
// Clear out CREATE_STRATEGY bit flags in case of conflict
35-
VmaAllocationCreateFlags clear_mask = ~VMA_ALLOCATION_CREATE_STRATEGY_MASK;
36-
VmaAllocationCreateFlags create_flags = create_info.flags & clear_mask;
37-
// Use the default allocation strategy
38-
aggregate_create_info.flags =
39-
create_flags | vkapi::DEFAULT_ALLOCATION_STRATEGY;
40-
41-
// Set the usage flag if it is currently not set
42-
if (aggregate_create_info.usage == VMA_MEMORY_USAGE_UNKNOWN) {
43-
aggregate_create_info.usage = create_info.usage;
44-
}
45-
// Otherwise check that there is no conflict regarding usage
46-
VK_CHECK_COND(aggregate_create_info.usage == create_info.usage);
47-
aggregate_create_info.requiredFlags |= create_info.requiredFlags;
48-
aggregate_create_info.preferredFlags |= create_info.preferredFlags;
49-
5026
users.emplace_back(idx);
5127
}
5228

5329
void SharedObject::allocate(ComputeGraph* const graph) {
5430
if (aggregate_memory_requirements.size == 0) {
5531
return;
5632
}
33+
34+
VmaAllocationCreateInfo alloc_create_info =
35+
graph->context()->adapter_ptr()->vma().gpuonly_resource_create_info();
36+
5737
allocation = graph->context()->adapter_ptr()->vma().create_allocation(
58-
aggregate_memory_requirements, aggregate_create_info);
38+
aggregate_memory_requirements, alloc_create_info);
5939
}
6040

6141
void SharedObject::bind_users(ComputeGraph* const graph) {

backends/vulkan/runtime/graph/containers/SharedObject.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ struct SharedObject {
2828
explicit SharedObject() = default;
2929

3030
VkMemoryRequirements aggregate_memory_requirements;
31-
VmaAllocationCreateInfo aggregate_create_info;
3231
std::vector<ValueRef> users;
3332
vkapi::Allocation allocation;
3433

backends/vulkan/runtime/vk_api/memory/Allocation.cpp

Lines changed: 5 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -26,58 +26,37 @@ namespace vkcompute {
2626
namespace vkapi {
2727

2828
Allocation::Allocation()
29-
: memory_requirements{},
30-
create_info{},
31-
allocator(VK_NULL_HANDLE),
32-
allocation(VK_NULL_HANDLE),
33-
allocation_info({}),
34-
is_copy_(false) {}
29+
: allocator(VK_NULL_HANDLE), allocation(VK_NULL_HANDLE), is_copy_(false) {}
3530

3631
Allocation::Allocation(
3732
VmaAllocator vma_allocator,
3833
const VkMemoryRequirements& mem_props,
3934
const VmaAllocationCreateInfo& create_info)
40-
: memory_requirements(mem_props),
41-
create_info(create_info),
42-
allocator(vma_allocator),
43-
allocation(VK_NULL_HANDLE),
44-
allocation_info({}),
45-
is_copy_(false) {
35+
: allocator(vma_allocator), allocation(VK_NULL_HANDLE), is_copy_(false) {
4636
VK_CHECK(vmaAllocateMemory(
47-
allocator, &memory_requirements, &create_info, &allocation, nullptr));
37+
allocator, &mem_props, &create_info, &allocation, nullptr));
4838
}
4939

5040
Allocation::Allocation(const Allocation& other) noexcept
51-
: memory_requirements(other.memory_requirements),
52-
create_info(other.create_info),
53-
allocator(other.allocator),
41+
: allocator(other.allocator),
5442
allocation(other.allocation),
55-
allocation_info(other.allocation_info),
5643
is_copy_(true) {}
5744

5845
Allocation::Allocation(Allocation&& other) noexcept
59-
: memory_requirements(other.memory_requirements),
60-
create_info(other.create_info),
61-
allocator(other.allocator),
46+
: allocator(other.allocator),
6247
allocation(other.allocation),
63-
allocation_info(other.allocation_info),
6448
is_copy_(other.is_copy_) {
6549
other.allocation = VK_NULL_HANDLE;
66-
other.allocation_info = {};
6750
}
6851

6952
Allocation& Allocation::operator=(Allocation&& other) noexcept {
7053
VmaAllocation tmp_allocation = allocation;
7154

72-
memory_requirements = other.memory_requirements;
73-
create_info = other.create_info;
7455
allocator = other.allocator;
7556
allocation = other.allocation;
76-
allocation_info = other.allocation_info;
7757
is_copy_ = other.is_copy_;
7858

7959
other.allocation = tmp_allocation;
80-
other.allocation_info = {};
8160

8261
return *this;
8362
}

backends/vulkan/runtime/vk_api/memory/Allocation.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,15 +55,10 @@ struct Allocation final {
5555

5656
~Allocation();
5757

58-
VkMemoryRequirements memory_requirements;
59-
// The properties this allocation was created with
60-
VmaAllocationCreateInfo create_info;
6158
// The allocator object this was allocated from
6259
VmaAllocator allocator;
6360
// Handles to the allocated memory
6461
VmaAllocation allocation;
65-
// Information about the allocated memory
66-
VmaAllocationInfo allocation_info;
6762

6863
private:
6964
// Indicates whether this class instance is a copy of another class instance,

backends/vulkan/runtime/vk_api/memory/Allocator.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,13 @@ Allocator::~Allocator() {
5858
vmaDestroyAllocator(allocator_);
5959
}
6060

61+
VmaAllocationCreateInfo Allocator::gpuonly_resource_create_info() {
62+
VmaAllocationCreateInfo alloc_create_info = {};
63+
alloc_create_info.flags = DEFAULT_ALLOCATION_STRATEGY;
64+
alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
65+
return alloc_create_info;
66+
}
67+
6168
Allocation Allocator::create_allocation(
6269
const VkMemoryRequirements& memory_requirements,
6370
const VmaAllocationCreateInfo& create_info) {
@@ -103,9 +110,7 @@ VulkanImage Allocator::create_image(
103110
(VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
104111
}
105112

106-
VmaAllocationCreateInfo alloc_create_info = {};
107-
alloc_create_info.flags = DEFAULT_ALLOCATION_STRATEGY;
108-
alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
113+
VmaAllocationCreateInfo alloc_create_info = gpuonly_resource_create_info();
109114

110115
const VulkanImage::ImageProperties image_props{
111116
image_type,
@@ -157,10 +162,7 @@ VulkanBuffer Allocator::create_storage_buffer(
157162
const bool allocate_memory) {
158163
const VkBufferUsageFlags buffer_usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
159164

160-
VmaAllocationCreateInfo alloc_create_info = {};
161-
alloc_create_info.flags = DEFAULT_ALLOCATION_STRATEGY;
162-
alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
163-
165+
VmaAllocationCreateInfo alloc_create_info = gpuonly_resource_create_info();
164166
return VulkanBuffer(
165167
allocator_, size, alloc_create_info, buffer_usage, allocate_memory);
166168
}

backends/vulkan/runtime/vk_api/memory/Allocator.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ class Allocator final {
4848
VmaAllocator allocator_;
4949

5050
public:
51+
VmaAllocationCreateInfo gpuonly_resource_create_info();
52+
5153
Allocation create_allocation(
5254
const VkMemoryRequirements& memory_requirements,
5355
const VmaAllocationCreateInfo& create_info);

backends/vulkan/runtime/vk_api/memory/Buffer.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,16 +58,14 @@ VulkanBuffer::VulkanBuffer(
5858
nullptr, // pQueueFamilyIndices
5959
};
6060

61-
memory_.create_info = allocation_create_info;
62-
6361
if (allocate_memory) {
6462
VK_CHECK(vmaCreateBuffer(
6563
allocator_,
6664
&buffer_create_info,
6765
&allocation_create_info,
6866
&handle_,
6967
&(memory_.allocation),
70-
&(memory_.allocation_info)));
68+
nullptr));
7169
} else {
7270
VmaAllocatorInfo allocator_info{};
7371
vmaGetAllocatorInfo(allocator_, &allocator_info);
@@ -137,6 +135,12 @@ VulkanBuffer::~VulkanBuffer() {
137135
}
138136
}
139137

138+
VmaAllocationInfo VulkanBuffer::allocation_info() const {
139+
VmaAllocationInfo info;
140+
vmaGetAllocationInfo(allocator_, memory_.allocation, &info);
141+
return info;
142+
}
143+
140144
VkMemoryRequirements VulkanBuffer::get_memory_requirements() const {
141145
VkMemoryRequirements memory_requirements;
142146
vkGetBufferMemoryRequirements(this->device(), handle_, &memory_requirements);

backends/vulkan/runtime/vk_api/memory/Buffer.h

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -114,13 +114,7 @@ class VulkanBuffer final {
114114
return memory_.allocation;
115115
}
116116

117-
inline VmaAllocationInfo allocation_info() const {
118-
return memory_.allocation_info;
119-
}
120-
121-
inline VmaAllocationCreateInfo allocation_create_info() const {
122-
return VmaAllocationCreateInfo(memory_.create_info);
123-
}
117+
VmaAllocationInfo allocation_info() const;
124118

125119
inline VkBuffer handle() const {
126120
return handle_;

backends/vulkan/runtime/vk_api/memory/Image.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,6 @@ VulkanImage::VulkanImage(
159159
layout_, // initialLayout
160160
};
161161

162-
memory_.create_info = allocation_create_info;
163-
164162
if (allocate_memory) {
165163
VK_CHECK(vmaCreateImage(
166164
allocator_,

backends/vulkan/runtime/vk_api/memory/Image.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -169,10 +169,6 @@ class VulkanImage final {
169169
return memory_.allocation;
170170
}
171171

172-
inline VmaAllocationCreateInfo allocation_create_info() const {
173-
return VmaAllocationCreateInfo(memory_.create_info);
174-
}
175-
176172
inline VkFormat format() const {
177173
return image_properties_.image_format;
178174
}

backends/vulkan/test/utils/test_utils.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -495,8 +495,10 @@ void submit_to_gpu() {
495495
}
496496

497497
vkapi::Allocation allocate_memory_for(const api::vTensor& vten) {
498+
VmaAllocationCreateInfo alloc_create_info =
499+
api::context()->adapter_ptr()->vma().gpuonly_resource_create_info();
498500
return api::context()->adapter_ptr()->vma().create_allocation(
499-
vten.get_memory_requirements(), vten.get_allocation_create_info());
501+
vten.get_memory_requirements(), alloc_create_info);
500502
}
501503

502504
VmaTotalStatistics get_vma_stats() {

backends/vulkan/test/vulkan_compute_api_test.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1066,11 +1066,11 @@ TEST_F(VulkanComputeAPITest, print_object_sizes) {
10661066
// can alert ourselves to any significant changes in the sizes of these
10671067
// objects by checking the `sizeof()` the class against some loose thresholds.
10681068

1069-
// Current known size on 64 bit system: 2064 B
1070-
EXPECT_TRUE(sizeof(vTensor) < 2200);
1071-
// Current known size on 64 bit system: 2080 B
1072-
EXPECT_TRUE(sizeof(Value) < 2200);
1073-
// Current known size on 64 bit system: 240 B
1069+
// Current known size on 64 bit system: 1040 B
1070+
EXPECT_TRUE(sizeof(vTensor) < 1200);
1071+
// Current known size on 64 bit system: 1056 B
1072+
EXPECT_TRUE(sizeof(Value) < 1200);
1073+
// Current known size on 64 bit system: 120 B
10741074
EXPECT_TRUE(sizeof(StagingBuffer) < 500);
10751075
// Current known size on 64 bit system: 384 B
10761076
EXPECT_TRUE(sizeof(ComputeGraph) < 500);

0 commit comments

Comments
 (0)