Skip to content

Commit bf5093a

Browse files
SS-JIAfacebook-github-bot
authored andcommitted
Clean up api::vTensor class (#3149)
Summary: Pull Request resolved: #3149 ## Context Now that we have forked the `api/` directory from PyTorch Vulkan, we can clean up the `vTensor` class and remove functionality that is not necessary for the ExecuTorch Vulkan delegate. The following changes are made: * Remove unused member variables and member functions from `vTensor` and `vTensorStorage` * Remove all quantization related member variables, member functions, and the `vTensor` constructor for quantized tensors. The Quantization API will be reworked from the ground up. * Rename `view_` (which is an instance of `vTensorStorage`) to `storage_` Finally, the critical change that is introduced is that we now store `storage_` as a direct `vTensorStorage` member variable in `vTensor` instead of storing it as a `std::shared_ptr<vTensorStorage>`. For context, the reason `storage_` was stored as a shared pointer is to be compliant with ATen Tensors, which needs to enable copy construction to enable the following: ``` at::Tensor b = at::rand(...); // Oftentimes this will create a "view" of the tensor. a and b will point the the same underlying storage, but with different metadata. at::Tensor a = b; ``` However, in the ExecuTorch delegate this is no longer necessary. Each Tensor is associated with it's own independent storage and is responsible for managing it's own memory. **By getting rid of `std::shared_ptr`, we can avoid a heap allocation and avoid chasing pointers whenever we need to access the resources of a `vTensor`.** ghstack-source-id: 223225901 exported-using-ghexport Reviewed By: jorgep31415 Differential Revision: D55811279 fbshipit-source-id: 95c0ecc9658ef9bc64ecee9e5c9e272da12786b8
1 parent 825db6c commit bf5093a

File tree

7 files changed

+65
-359
lines changed

7 files changed

+65
-359
lines changed

backends/vulkan/runtime/api/Tensor.cpp

Lines changed: 32 additions & 200 deletions
Original file line numberDiff line numberDiff line change
@@ -13,80 +13,6 @@ namespace vkcompute {
1313

1414
namespace {
1515

16-
/*
17-
* Calculates the strides of a contiguous tensor. empty_tensor_restride from
18-
* TensorImpl.h was used as a reference.
19-
*/
20-
std::vector<int64_t> calc_contiguous_strides(
21-
const std::vector<int64_t>& sizes) {
22-
int64_t ndim = static_cast<int64_t>(sizes.size());
23-
std::vector<int64_t> strides(ndim);
24-
25-
int64_t running_product = 1;
26-
if (ndim >= 1) {
27-
strides.at(ndim - 1) = running_product;
28-
for (int i = static_cast<int>(sizes.size()) - 2; i >= 0; --i) {
29-
running_product *= sizes.at(i + 1);
30-
strides.at(i) = running_product;
31-
}
32-
}
33-
34-
return strides;
35-
}
36-
37-
std::vector<int64_t> calc_channels_last_strides(
38-
const std::vector<int64_t>& sizes) {
39-
std::vector<int64_t> strides(sizes.size());
40-
41-
switch (sizes.size()) {
42-
case 4:
43-
strides.at(1) = 1;
44-
strides.at(3) = sizes.at(1);
45-
strides.at(2) = strides.at(3) * sizes.at(3);
46-
strides.at(0) = strides.at(2) * sizes.at(2);
47-
return strides;
48-
case 3:
49-
strides.at(0) = 1;
50-
strides.at(2) = sizes.at(0);
51-
strides.at(1) = strides.at(2) * sizes.at(2);
52-
return strides;
53-
default:
54-
VK_THROW("ChannelsLast format only available for 3 <= ndim <= 4!");
55-
}
56-
57-
return strides;
58-
}
59-
60-
/*
61-
* Calculates the strides of a tensor based on the sizes and memory format. Note
62-
* that strides are only valid for vTensors that are backed by buffer storage;
63-
* if texture storage is used then the strides are invalid and set to zeros.
64-
*/
65-
std::vector<int64_t> calc_strides(
66-
const std::vector<int64_t>& sizes,
67-
const api::GPUMemoryLayout memory_layout,
68-
const api::StorageType storage_type) {
69-
switch (storage_type) {
70-
case api::kBuffer:
71-
switch (memory_layout) {
72-
case api::kWidthPacked:
73-
return calc_contiguous_strides(sizes);
74-
break;
75-
case api::kChannelsPacked:
76-
return calc_channels_last_strides(sizes);
77-
break;
78-
default:
79-
VK_THROW("Invalid memory format used to create vTensor!");
80-
}
81-
break;
82-
case api::kTexture3D:
83-
case api::kTexture2D:
84-
return std::vector<int64_t>(sizes.size());
85-
default:
86-
VK_THROW("Invalid storage type used to create vTensor!");
87-
}
88-
}
89-
9016
/*
9117
* When stored on the GPU, one dimension will be aligned to the next multiple of
9218
* 4 in order to take advantage of vec4 data types. The dimension that is
@@ -176,11 +102,11 @@ api::utils::uvec3 create_image_extents(
176102

177103
switch (memory_layout) {
178104
case api::kWidthPacked:
179-
VK_CHECK_COND(width % 4 == 0, "Channels must be divisible by 4!");
105+
VK_CHECK_COND(width % 4 == 0, "Width must be divisible by 4!");
180106
width /= 4;
181107
break;
182108
case api::kHeightPacked:
183-
VK_CHECK_COND(height % 4 == 0, "Channels must be divisible by 4!");
109+
VK_CHECK_COND(height % 4 == 0, "Height must be divisible by 4!");
184110
height /= 4;
185111
break;
186112
case api::kChannelsPacked:
@@ -212,23 +138,19 @@ vTensor::vTensor(
212138
memory_layout_(memory_layout),
213139
// Calculate sizes and strides
214140
sizes_(sizes.begin(), sizes.end()),
215-
strides_{calc_strides(sizes, memory_layout_, storage_type)},
216141
gpu_sizes_{calc_gpu_sizes(sizes, memory_layout_, storage_type)},
217-
gpu_strides_{calc_strides(gpu_sizes_, memory_layout_, storage_type)},
218-
virtual_extents_(
219-
create_image_extents(gpu_sizes_, storage_type, memory_layout)),
220142
// Utility Uniform Buffers that can be passed to shaders as arguments
221143
cpu_sizes_uniform_(nullptr),
222144
gpu_sizes_uniform_(nullptr),
223145
extents_uniform_(nullptr),
224146
// Construct Tensor storage
225-
view_(std::make_shared<vTensorStorage>(
147+
storage_(
226148
context,
227149
storage_type,
228150
memory_layout_,
229151
gpu_sizes_,
230152
dtype_,
231-
allocate_memory)) {
153+
allocate_memory) {
232154
if (dtype == api::kHalf) {
233155
VK_CHECK_COND(
234156
api::context()->adapter_ptr()->has_16bit_storage(),
@@ -237,93 +159,60 @@ vTensor::vTensor(
237159
}
238160
}
239161

240-
vTensor::vTensor(
241-
api::Context* const context,
242-
const std::vector<int64_t>& sizes,
243-
double q_scale,
244-
int64_t q_zero_point,
245-
const api::ScalarType dtype,
246-
const api::StorageType storage_type,
247-
const api::GPUMemoryLayout memory_layout)
248-
: dtype_(dtype),
249-
memory_layout_(memory_layout),
250-
// Calculate sizes and strides
251-
sizes_(sizes.begin(), sizes.end()),
252-
strides_{calc_strides(sizes, memory_layout_, storage_type)},
253-
gpu_sizes_{calc_gpu_sizes(sizes, memory_layout_, storage_type)},
254-
gpu_strides_{calc_strides(gpu_sizes_, memory_layout_, storage_type)},
255-
virtual_extents_(
256-
create_image_extents(gpu_sizes_, storage_type, memory_layout)),
257-
// Vulkan uniform buffer containing sizes and stride info
258-
cpu_sizes_uniform_(nullptr),
259-
gpu_sizes_uniform_(nullptr),
260-
extents_uniform_(nullptr),
261-
// Quantization params
262-
is_quantized_{true},
263-
q_scale_{q_scale},
264-
q_zero_point_{q_zero_point},
265-
// Construct Tensor storage
266-
view_(std::make_shared<vTensorStorage>(
267-
context,
268-
storage_type,
269-
memory_layout_,
270-
gpu_sizes_,
271-
dtype_)) {}
272-
273162
api::VulkanImage& vTensor::image(
274163
api::PipelineBarrier& pipeline_barrier,
275-
const api::PipelineStageFlags stage) const& {
276-
view_->transition(pipeline_barrier, stage, api::MemoryAccessType::READ);
277-
return view_->image_;
164+
const api::PipelineStageFlags stage) & {
165+
storage_.transition(pipeline_barrier, stage, api::MemoryAccessType::READ);
166+
return storage_.image_;
278167
}
279168

280169
api::VulkanImage& vTensor::image(
281170
api::PipelineBarrier& pipeline_barrier,
282171
const api::PipelineStageFlags stage,
283172
const api::MemoryAccessFlags access) & {
284-
view_->transition(pipeline_barrier, stage, access);
285-
return view_->image_;
173+
storage_.transition(pipeline_barrier, stage, access);
174+
return storage_.image_;
286175
}
287176

288177
api::VulkanBuffer& vTensor::buffer(
289178
api::PipelineBarrier& pipeline_barrier,
290-
const api::PipelineStageFlags stage) const& {
291-
view_->transition(pipeline_barrier, stage, api::MemoryAccessType::READ);
292-
return view_->buffer_;
179+
const api::PipelineStageFlags stage) & {
180+
storage_.transition(pipeline_barrier, stage, api::MemoryAccessType::READ);
181+
return storage_.buffer_;
293182
}
294183

295184
api::VulkanBuffer& vTensor::buffer(
296185
api::PipelineBarrier& pipeline_barrier,
297186
const api::PipelineStageFlags stage,
298187
const api::MemoryAccessFlags access) & {
299-
view_->transition(pipeline_barrier, stage, access);
300-
return view_->buffer_;
188+
storage_.transition(pipeline_barrier, stage, access);
189+
return storage_.buffer_;
301190
}
302191

303192
std::shared_ptr<api::UniformParamsBuffer> vTensor::cpu_sizes_ubo() {
304193
if (!cpu_sizes_uniform_) {
305194
cpu_sizes_uniform_.reset(new api::UniformParamsBuffer(
306-
view_->context_, api::utils::make_whcn_ivec4(sizes_)));
195+
storage_.context_, api::utils::make_whcn_ivec4(sizes_)));
307196
}
308197
return cpu_sizes_uniform_;
309198
}
310199

311200
std::shared_ptr<api::UniformParamsBuffer> vTensor::gpu_sizes_ubo() {
312201
if (!gpu_sizes_uniform_) {
313202
gpu_sizes_uniform_.reset(new api::UniformParamsBuffer(
314-
view_->context_, api::utils::make_whcn_ivec4(gpu_sizes_)));
203+
storage_.context_, api::utils::make_whcn_ivec4(gpu_sizes_)));
315204
}
316205
return gpu_sizes_uniform_;
317206
}
318207

319208
std::shared_ptr<api::UniformParamsBuffer> vTensor::extents_ubo() {
320209
if (!extents_uniform_) {
321210
extents_uniform_.reset(new api::UniformParamsBuffer(
322-
view_->context_,
211+
storage_.context_,
323212
api::utils::uvec4(
324-
{view_->extents_.data[0],
325-
view_->extents_.data[1],
326-
view_->extents_.data[2],
213+
{storage_.extents_.data[0],
214+
storage_.extents_.data[1],
215+
storage_.extents_.data[2],
327216
1u})));
328217
}
329218
return extents_uniform_;
@@ -332,41 +221,41 @@ std::shared_ptr<api::UniformParamsBuffer> vTensor::extents_ubo() {
332221
VmaAllocationCreateInfo vTensor::get_allocation_create_info() const {
333222
switch (storage_type()) {
334223
case api::kBuffer:
335-
return view_->buffer_.allocation_create_info();
224+
return storage_.buffer_.allocation_create_info();
336225
case api::kTexture2D:
337226
case api::kTexture3D:
338-
return view_->image_.allocation_create_info();
227+
return storage_.image_.allocation_create_info();
339228
}
340229
return {};
341230
}
342231

343232
VkMemoryRequirements vTensor::get_memory_requirements() const {
344233
switch (storage_type()) {
345234
case api::kBuffer:
346-
return view_->buffer_.get_memory_requirements();
235+
return storage_.buffer_.get_memory_requirements();
347236
case api::kTexture2D:
348237
case api::kTexture3D:
349-
return view_->image_.get_memory_requirements();
238+
return storage_.image_.get_memory_requirements();
350239
}
351240
return {};
352241
}
353242

354243
void vTensor::bind_allocation(const api::MemoryAllocation& allocation) {
355244
switch (storage_type()) {
356245
case api::kBuffer:
357-
view_->buffer_.bind_allocation(allocation);
246+
storage_.buffer_.bind_allocation(allocation);
358247
break;
359248
case api::kTexture2D:
360249
case api::kTexture3D:
361-
view_->image_.bind_allocation(allocation);
250+
storage_.image_.bind_allocation(allocation);
362251
break;
363252
}
364253
}
365254

366255
void vTensor::update_size_metadata(const std::vector<int64_t>& new_sizes) {
367256
sizes_ = new_sizes;
368257
gpu_sizes_ = calc_gpu_sizes(sizes_, memory_layout_, storage_type());
369-
virtual_extents_ =
258+
api::utils::uvec3 virtual_extents =
370259
create_image_extents(gpu_sizes_, storage_type(), memory_layout_);
371260

372261
if (cpu_sizes_uniform_) {
@@ -379,47 +268,23 @@ void vTensor::update_size_metadata(const std::vector<int64_t>& new_sizes) {
379268

380269
if (extents_uniform_) {
381270
extents_uniform_->update(api::utils::uvec4(
382-
{virtual_extents_.data[0],
383-
virtual_extents_.data[1],
384-
virtual_extents_.data[2],
271+
{virtual_extents.data[0],
272+
virtual_extents.data[1],
273+
virtual_extents.data[2],
385274
1u}));
386275
}
387276
}
388277

389278
void vTensor::reallocate(const std::vector<int64_t>& new_sizes) {
390279
update_size_metadata(new_sizes);
391-
view_->discard_and_reallocate(
280+
storage_.discard_and_reallocate(
392281
calc_gpu_sizes(new_sizes, memory_layout_, storage_type()),
393282
memory_layout_,
394283
dtype_);
395284
}
396285

397286
void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {
398287
update_size_metadata(new_sizes);
399-
if (storage_type() == api::kBuffer) {
400-
if (gpu_nbytes() > view_->buffer_.mem_size()) {
401-
VK_THROW(
402-
"Cannot virtual_resize a vTensor with sizes that require a larger "
403-
"buffer! reallocate() should be used instead.");
404-
}
405-
} else {
406-
bool valid_resize = true;
407-
if (virtual_extents_.data[0] > view_->extents_.data[0]) {
408-
valid_resize = false;
409-
}
410-
if (virtual_extents_.data[1] > view_->extents_.data[1]) {
411-
valid_resize = false;
412-
}
413-
if (virtual_extents_.data[2] > view_->extents_.data[2]) {
414-
valid_resize = false;
415-
}
416-
417-
if (!valid_resize) {
418-
VK_THROW(
419-
"Cannot virtual_resize a vTensor with sizes that require a larger "
420-
"image texture! reallocate() should be used instead.");
421-
}
422-
}
423288
}
424289

425290
//
@@ -442,7 +307,7 @@ api::VulkanImage allocate_image(
442307
};
443308

444309
VkImageType image_type = VK_IMAGE_TYPE_3D;
445-
VkImageViewType image_view_type = VK_IMAGE_VIEW_TYPE_3D;
310+
VkImageViewType image_view_type;
446311

447312
switch (storage_type) {
448313
case api::kTexture3D:
@@ -584,39 +449,6 @@ void vTensorStorage::transition(
584449
last_access_.access = cur_access;
585450
}
586451

587-
void add_buffer_barrier(
588-
api::PipelineBarrier& pipeline_barrier,
589-
const api::VulkanBuffer& buffer,
590-
const api::PipelineStageFlags prev_stage,
591-
const api::MemoryAccessFlags prev_access,
592-
const api::PipelineStageFlags cur_stage,
593-
const api::MemoryAccessFlags cur_access) {
594-
// Check for RAW
595-
const bool read_requested = (cur_access & api::MemoryAccessType::READ) != 0;
596-
const bool prev_written = (prev_access & api::MemoryAccessType::WRITE) != 0;
597-
598-
const bool is_RAW = read_requested && prev_written;
599-
600-
if (is_RAW) {
601-
VkPipelineStageFlags src_stage = api::vk_stage(prev_stage);
602-
if (0u == src_stage) {
603-
src_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
604-
}
605-
VkPipelineStageFlags dst_stage = api::vk_stage(cur_stage);
606-
if (0u == dst_stage) {
607-
dst_stage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
608-
}
609-
610-
pipeline_barrier.stage.src |= src_stage;
611-
pipeline_barrier.stage.dst |= dst_stage;
612-
613-
pipeline_barrier.buffers.emplace_back(
614-
api::vk_access(prev_stage, prev_access),
615-
api::vk_access(cur_stage, cur_access),
616-
buffer);
617-
}
618-
}
619-
620452
void vTensorStorage::discard_and_reallocate(
621453
const std::vector<int64_t>& gpu_sizes,
622454
const api::GPUMemoryLayout gpu_memory_layout,

0 commit comments

Comments
 (0)