Skip to content

Commit c5294c0

Browse files
committed
Update on "[executorch] Custom op for fast hadamard transform kernel"
Custom op support for Fast Hadamard Transform. Differential Revision: [D60530438](https://our.internmc.facebook.com/intern/diff/D60530438/) [ghstack-poisoned]
2 parents cf0f7d6 + a06239a commit c5294c0

File tree

66 files changed

+2240
-731
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+2240
-731
lines changed

.github/workflows/android-perf.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,8 @@ jobs:
220220
device: ${{ fromJson(needs.set-parameters.outputs.devices) }}
221221
fail-fast: false
222222
with:
223+
# Due to scheduling a job may be pushed beyond the default 60m threshold
224+
timeout: 120
223225
device-type: android
224226
runner: linux.2xlarge
225227
test-infra-ref: ''

.github/workflows/apple-perf.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,8 @@ jobs:
292292
device: ${{ fromJson(needs.set-parameters.outputs.devices) }}
293293
fail-fast: false
294294
with:
295+
# Due to scheduling a job may be pushed beyond the default 60m threshold
296+
timeout: 120
295297
device-type: ios
296298
# For iOS testing, the runner just needs to call AWS Device Farm, so there is no need to run this on macOS
297299
runner: linux.2xlarge

backends/vulkan/runtime/api/containers/StagingBuffer.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ class StagingBuffer final {
2727
size_t nbytes_;
2828
vkapi::VulkanBuffer vulkan_buffer_;
2929

30+
void* mapped_data_;
31+
3032
public:
3133
StagingBuffer(
3234
Context* context_p,
@@ -37,7 +39,8 @@ class StagingBuffer final {
3739
numel_(numel),
3840
nbytes_(element_size(dtype_) * numel_),
3941
vulkan_buffer_(
40-
context_p_->adapter_ptr()->vma().create_staging_buffer(nbytes_)) {}
42+
context_p_->adapter_ptr()->vma().create_staging_buffer(nbytes_)),
43+
mapped_data_(nullptr) {}
4144

4245
StagingBuffer(const StagingBuffer&) = delete;
4346
StagingBuffer& operator=(const StagingBuffer&) = delete;
@@ -58,7 +61,10 @@ class StagingBuffer final {
5861
}
5962

6063
inline void* data() {
61-
return vulkan_buffer_.allocation_info().pMappedData;
64+
if (!mapped_data_) {
65+
mapped_data_ = vulkan_buffer_.allocation_info().pMappedData;
66+
}
67+
return mapped_data_;
6268
}
6369

6470
inline size_t numel() {

backends/vulkan/runtime/api/containers/Tensor.cpp

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -436,17 +436,6 @@ size_t vTensor::staging_buffer_numel() const {
436436
return padded_numel_;
437437
}
438438

439-
VmaAllocationCreateInfo vTensor::get_allocation_create_info() const {
440-
switch (storage_type()) {
441-
case utils::kBuffer:
442-
return storage_.buffer_.allocation_create_info();
443-
case utils::kTexture2D:
444-
case utils::kTexture3D:
445-
return storage_.image_.allocation_create_info();
446-
}
447-
return {};
448-
}
449-
450439
VkMemoryRequirements vTensor::get_memory_requirements() const {
451440
switch (storage_type()) {
452441
case utils::kBuffer:
@@ -567,6 +556,48 @@ void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {
567556
update_metadata();
568557
}
569558

559+
/*
560+
* Transposing the dim order is a bit unintuitive. dim0 and dim1 have swapped
561+
* their "identities", so we need to swap the values of dim0 and dim1 wherever
562+
* they appear in the dim order vector. Compare this to just swapping the
563+
* elements at dim0 and dim1 in the `sizes` vectors.
564+
*/
565+
void transpose_dim_order_inplace(
566+
std::vector<int64_t>& dim_order,
567+
const int64_t dim0,
568+
const int64_t dim1) {
569+
for (int i = 0; i < dim_order.size(); ++i) {
570+
if (dim_order[i] == dim0) {
571+
dim_order[i] = dim1;
572+
} else if (dim_order[i] == dim1) {
573+
dim_order[i] = dim0;
574+
}
575+
}
576+
}
577+
578+
void vTensor::virtual_transpose(const int64_t dim0, const int64_t dim1) {
579+
std::iter_swap(sizes_.begin() + dim0, sizes_.begin() + dim1);
580+
if (storage_type() == utils::kBuffer) {
581+
transpose_dim_order_inplace(dim_order_, dim0, dim1);
582+
} else {
583+
const int dim0_whcn = sizes_.size() - 1 - dim0;
584+
const int dim1_whcn = sizes_.size() - 1 - dim1;
585+
// Cannot transpose batch dimension for texture storage
586+
VK_CHECK_COND(dim0_whcn < 3 && dim1_whcn < 3);
587+
588+
std::iter_swap(
589+
axis_map_.begin() + dim0_whcn, axis_map_.begin() + dim1_whcn);
590+
591+
if (packed_dim_whcn_idx() == dim0_whcn) {
592+
memory_layout_ = utils::GPUMemoryLayout(dim1_whcn);
593+
}
594+
if (packed_dim_whcn_idx() == dim1_whcn) {
595+
memory_layout_ = utils::GPUMemoryLayout(dim0_whcn);
596+
}
597+
}
598+
update_metadata();
599+
}
600+
570601
void vTensor::reallocate(const std::vector<int64_t>& new_sizes) {
571602
sizes_ = new_sizes;
572603
update_metadata();

backends/vulkan/runtime/api/containers/Tensor.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -530,6 +530,11 @@ class vTensor final {
530530
*/
531531
void virtual_resize(const std::vector<int64_t>& new_sizes);
532532

533+
/*
534+
* Transpose the tensor in-place by updating its metadata.
535+
*/
536+
void virtual_transpose(const int64_t dim0, const int64_t dim1);
537+
533538
/*
534539
* Discard the underlying VkImage or VkBuffer and re-allocate based on new
535540
* tensor sizes

backends/vulkan/runtime/graph/containers/SharedObject.cpp

Lines changed: 5 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -15,47 +15,27 @@ namespace vkcompute {
1515
void SharedObject::add_user(ComputeGraph* const graph, const ValueRef idx) {
1616
vTensorPtr t = graph->get_tensor(idx);
1717

18-
//
1918
// Aggregate Memory Requirements
20-
//
21-
2219
const VkMemoryRequirements mem_reqs = t->get_memory_requirements();
2320
aggregate_memory_requirements.size =
2421
std::max(mem_reqs.size, aggregate_memory_requirements.size);
2522
aggregate_memory_requirements.alignment =
2623
std::max(mem_reqs.alignment, aggregate_memory_requirements.alignment);
2724
aggregate_memory_requirements.memoryTypeBits |= mem_reqs.memoryTypeBits;
2825

29-
//
30-
// Aggregate Allocation Create Info
31-
//
32-
33-
const VmaAllocationCreateInfo create_info = t->get_allocation_create_info();
34-
// Clear out CREATE_STRATEGY bit flags in case of conflict
35-
VmaAllocationCreateFlags clear_mask = ~VMA_ALLOCATION_CREATE_STRATEGY_MASK;
36-
VmaAllocationCreateFlags create_flags = create_info.flags & clear_mask;
37-
// Use the default allocation strategy
38-
aggregate_create_info.flags =
39-
create_flags | vkapi::DEFAULT_ALLOCATION_STRATEGY;
40-
41-
// Set the usage flag if it is currently not set
42-
if (aggregate_create_info.usage == VMA_MEMORY_USAGE_UNKNOWN) {
43-
aggregate_create_info.usage = create_info.usage;
44-
}
45-
// Otherwise check that there is no conflict regarding usage
46-
VK_CHECK_COND(aggregate_create_info.usage == create_info.usage);
47-
aggregate_create_info.requiredFlags |= create_info.requiredFlags;
48-
aggregate_create_info.preferredFlags |= create_info.preferredFlags;
49-
5026
users.emplace_back(idx);
5127
}
5228

5329
void SharedObject::allocate(ComputeGraph* const graph) {
5430
if (aggregate_memory_requirements.size == 0) {
5531
return;
5632
}
33+
34+
VmaAllocationCreateInfo alloc_create_info =
35+
graph->context()->adapter_ptr()->vma().gpuonly_resource_create_info();
36+
5737
allocation = graph->context()->adapter_ptr()->vma().create_allocation(
58-
aggregate_memory_requirements, aggregate_create_info);
38+
aggregate_memory_requirements, alloc_create_info);
5939
}
6040

6141
void SharedObject::bind_users(ComputeGraph* const graph) {

backends/vulkan/runtime/graph/containers/SharedObject.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ struct SharedObject {
2828
explicit SharedObject() = default;
2929

3030
VkMemoryRequirements aggregate_memory_requirements;
31-
VmaAllocationCreateInfo aggregate_create_info;
3231
std::vector<ValueRef> users;
3332
vkapi::Allocation allocation;
3433

backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ ivec3 to_texture_pos(
270270

271271
// Adjust batch dim if needed
272272
if (sizes.w > 1) {
273-
pos[axis_map[axis_map[3]]] += idx.w * sizes.w;
273+
pos[axis_map[axis_map.w]] += idx.w * sizes[axis_map.w];
274274
}
275275

276276
// Adjust packed dim. Moving 1 texel unit along the packed dim traverses 4
@@ -317,7 +317,7 @@ ivec4 to_texture_elem_pos(
317317

318318
// Adjust batch dim if needed
319319
if (sizes.w > 1) {
320-
pos[axis_map[axis_map[3]]] += idx.w * sizes.w;
320+
pos[axis_map[axis_map.w]] += idx.w * sizes[axis_map.w];
321321
}
322322

323323
// Adjust packed dim. Moving 1 texel unit along the packed dim traverses 4

backends/vulkan/runtime/vk_api/memory/Allocation.cpp

Lines changed: 5 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -26,58 +26,37 @@ namespace vkcompute {
2626
namespace vkapi {
2727

2828
Allocation::Allocation()
29-
: memory_requirements{},
30-
create_info{},
31-
allocator(VK_NULL_HANDLE),
32-
allocation(VK_NULL_HANDLE),
33-
allocation_info({}),
34-
is_copy_(false) {}
29+
: allocator(VK_NULL_HANDLE), allocation(VK_NULL_HANDLE), is_copy_(false) {}
3530

3631
Allocation::Allocation(
3732
VmaAllocator vma_allocator,
3833
const VkMemoryRequirements& mem_props,
3934
const VmaAllocationCreateInfo& create_info)
40-
: memory_requirements(mem_props),
41-
create_info(create_info),
42-
allocator(vma_allocator),
43-
allocation(VK_NULL_HANDLE),
44-
allocation_info({}),
45-
is_copy_(false) {
35+
: allocator(vma_allocator), allocation(VK_NULL_HANDLE), is_copy_(false) {
4636
VK_CHECK(vmaAllocateMemory(
47-
allocator, &memory_requirements, &create_info, &allocation, nullptr));
37+
allocator, &mem_props, &create_info, &allocation, nullptr));
4838
}
4939

5040
Allocation::Allocation(const Allocation& other) noexcept
51-
: memory_requirements(other.memory_requirements),
52-
create_info(other.create_info),
53-
allocator(other.allocator),
41+
: allocator(other.allocator),
5442
allocation(other.allocation),
55-
allocation_info(other.allocation_info),
5643
is_copy_(true) {}
5744

5845
Allocation::Allocation(Allocation&& other) noexcept
59-
: memory_requirements(other.memory_requirements),
60-
create_info(other.create_info),
61-
allocator(other.allocator),
46+
: allocator(other.allocator),
6247
allocation(other.allocation),
63-
allocation_info(other.allocation_info),
6448
is_copy_(other.is_copy_) {
6549
other.allocation = VK_NULL_HANDLE;
66-
other.allocation_info = {};
6750
}
6851

6952
Allocation& Allocation::operator=(Allocation&& other) noexcept {
7053
VmaAllocation tmp_allocation = allocation;
7154

72-
memory_requirements = other.memory_requirements;
73-
create_info = other.create_info;
7455
allocator = other.allocator;
7556
allocation = other.allocation;
76-
allocation_info = other.allocation_info;
7757
is_copy_ = other.is_copy_;
7858

7959
other.allocation = tmp_allocation;
80-
other.allocation_info = {};
8160

8261
return *this;
8362
}

backends/vulkan/runtime/vk_api/memory/Allocation.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,15 +55,10 @@ struct Allocation final {
5555

5656
~Allocation();
5757

58-
VkMemoryRequirements memory_requirements;
59-
// The properties this allocation was created with
60-
VmaAllocationCreateInfo create_info;
6158
// The allocator object this was allocated from
6259
VmaAllocator allocator;
6360
// Handles to the allocated memory
6461
VmaAllocation allocation;
65-
// Information about the allocated memory
66-
VmaAllocationInfo allocation_info;
6762

6863
private:
6964
// Indicates whether this class instance is a copy of another class instance,

backends/vulkan/runtime/vk_api/memory/Allocator.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,13 @@ Allocator::~Allocator() {
5858
vmaDestroyAllocator(allocator_);
5959
}
6060

61+
VmaAllocationCreateInfo Allocator::gpuonly_resource_create_info() {
62+
VmaAllocationCreateInfo alloc_create_info = {};
63+
alloc_create_info.flags = DEFAULT_ALLOCATION_STRATEGY;
64+
alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
65+
return alloc_create_info;
66+
}
67+
6168
Allocation Allocator::create_allocation(
6269
const VkMemoryRequirements& memory_requirements,
6370
const VmaAllocationCreateInfo& create_info) {
@@ -103,9 +110,7 @@ VulkanImage Allocator::create_image(
103110
(VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
104111
}
105112

106-
VmaAllocationCreateInfo alloc_create_info = {};
107-
alloc_create_info.flags = DEFAULT_ALLOCATION_STRATEGY;
108-
alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
113+
VmaAllocationCreateInfo alloc_create_info = gpuonly_resource_create_info();
109114

110115
const VulkanImage::ImageProperties image_props{
111116
image_type,
@@ -157,10 +162,7 @@ VulkanBuffer Allocator::create_storage_buffer(
157162
const bool allocate_memory) {
158163
const VkBufferUsageFlags buffer_usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
159164

160-
VmaAllocationCreateInfo alloc_create_info = {};
161-
alloc_create_info.flags = DEFAULT_ALLOCATION_STRATEGY;
162-
alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
163-
165+
VmaAllocationCreateInfo alloc_create_info = gpuonly_resource_create_info();
164166
return VulkanBuffer(
165167
allocator_, size, alloc_create_info, buffer_usage, allocate_memory);
166168
}

backends/vulkan/runtime/vk_api/memory/Allocator.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ class Allocator final {
4848
VmaAllocator allocator_;
4949

5050
public:
51+
VmaAllocationCreateInfo gpuonly_resource_create_info();
52+
5153
Allocation create_allocation(
5254
const VkMemoryRequirements& memory_requirements,
5355
const VmaAllocationCreateInfo& create_info);

backends/vulkan/runtime/vk_api/memory/Buffer.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,16 +58,14 @@ VulkanBuffer::VulkanBuffer(
5858
nullptr, // pQueueFamilyIndices
5959
};
6060

61-
memory_.create_info = allocation_create_info;
62-
6361
if (allocate_memory) {
6462
VK_CHECK(vmaCreateBuffer(
6563
allocator_,
6664
&buffer_create_info,
6765
&allocation_create_info,
6866
&handle_,
6967
&(memory_.allocation),
70-
&(memory_.allocation_info)));
68+
nullptr));
7169
} else {
7270
VmaAllocatorInfo allocator_info{};
7371
vmaGetAllocatorInfo(allocator_, &allocator_info);
@@ -137,6 +135,12 @@ VulkanBuffer::~VulkanBuffer() {
137135
}
138136
}
139137

138+
VmaAllocationInfo VulkanBuffer::allocation_info() const {
139+
VmaAllocationInfo info;
140+
vmaGetAllocationInfo(allocator_, memory_.allocation, &info);
141+
return info;
142+
}
143+
140144
VkMemoryRequirements VulkanBuffer::get_memory_requirements() const {
141145
VkMemoryRequirements memory_requirements;
142146
vkGetBufferMemoryRequirements(this->device(), handle_, &memory_requirements);

backends/vulkan/runtime/vk_api/memory/Buffer.h

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -114,13 +114,7 @@ class VulkanBuffer final {
114114
return memory_.allocation;
115115
}
116116

117-
inline VmaAllocationInfo allocation_info() const {
118-
return memory_.allocation_info;
119-
}
120-
121-
inline VmaAllocationCreateInfo allocation_create_info() const {
122-
return VmaAllocationCreateInfo(memory_.create_info);
123-
}
117+
VmaAllocationInfo allocation_info() const;
124118

125119
inline VkBuffer handle() const {
126120
return handle_;

backends/vulkan/runtime/vk_api/memory/Image.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,6 @@ VulkanImage::VulkanImage(
159159
layout_, // initialLayout
160160
};
161161

162-
memory_.create_info = allocation_create_info;
163-
164162
if (allocate_memory) {
165163
VK_CHECK(vmaCreateImage(
166164
allocator_,

backends/vulkan/runtime/vk_api/memory/Image.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -169,10 +169,6 @@ class VulkanImage final {
169169
return memory_.allocation;
170170
}
171171

172-
inline VmaAllocationCreateInfo allocation_create_info() const {
173-
return VmaAllocationCreateInfo(memory_.create_info);
174-
}
175-
176172
inline VkFormat format() const {
177173
return image_properties_.image_format;
178174
}

0 commit comments

Comments
 (0)