Skip to content

Commit a0673d8

Browse files
SS-JIAfacebook-github-bot
authored andcommitted
Fork Vulkan API into ExecuTorch directory (pytorch#2797)
Summary: Pull Request resolved: pytorch#2797 ## Context Similar to pytorch#2086 which moved the `ComputeGraph` API to the ExecuTorch repo, this changeset forks the Vulkan Compute API (under `pytorch/aten/src/ATen/native/vulkan/api`) to the ExecuTorch repository. This is critical for development speed as it allows us to make foundational changes to the Vulkan compute API without 1. Having to export changes to PyTorch 2. Having to worry about breaking ATen Vulkan The next change in the stack will deprecate the `at::native::vulkan` namespace now that everything is local to ExecuTorch. bypass-github-export-checks Reviewed By: copyrightly Differential Revision: D55607115
1 parent 6965120 commit a0673d8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+9043
-22
lines changed

backends/vulkan/runtime/api/Adapter.cpp

Lines changed: 449 additions & 0 deletions
Large diffs are not rendered by default.

backends/vulkan/runtime/api/Adapter.h

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#pragma once
10+
11+
// @lint-ignore-every CLANGTIDY facebook-hte-BadMemberName
12+
13+
#ifdef USE_VULKAN_API
14+
15+
#include <executorch/backends/vulkan/runtime/api/vk_api.h>
16+
17+
#include <executorch/backends/vulkan/runtime/api/Pipeline.h>
18+
#include <executorch/backends/vulkan/runtime/api/Shader.h>
19+
#include <executorch/backends/vulkan/runtime/api/Utils.h>
20+
21+
#include <array>
22+
#include <mutex>
23+
#include <ostream>
24+
25+
namespace at {
26+
namespace native {
27+
namespace vulkan {
28+
namespace api {
29+
30+
struct PhysicalDevice final {
31+
// Handle
32+
VkPhysicalDevice handle;
33+
34+
// Properties obtained from Vulkan
35+
VkPhysicalDeviceProperties properties;
36+
VkPhysicalDeviceMemoryProperties memory_properties;
37+
std::vector<VkQueueFamilyProperties> queue_families;
38+
39+
// Metadata
40+
uint32_t num_compute_queues;
41+
bool has_unified_memory;
42+
bool has_timestamps;
43+
float timestamp_period;
44+
45+
explicit PhysicalDevice(VkPhysicalDevice);
46+
};
47+
48+
class DeviceHandle final {
49+
public:
50+
explicit DeviceHandle(VkDevice device);
51+
52+
DeviceHandle(const DeviceHandle&) = delete;
53+
DeviceHandle& operator=(const DeviceHandle&) = delete;
54+
55+
DeviceHandle(DeviceHandle&&) noexcept;
56+
DeviceHandle& operator=(DeviceHandle&&) = delete;
57+
58+
~DeviceHandle();
59+
60+
private:
61+
VkDevice handle_;
62+
63+
friend class Adapter;
64+
};
65+
66+
//
67+
// A Vulkan Adapter represents a logical device and all its properties. It
68+
// manages all relevant properties of the underlying physical device, a
69+
// handle to the logical device, and a number of compute queues available to
70+
// the device. It is primarily responsible for managing the VkDevice handle
71+
// which points to the logical device object on the GPU.
72+
//
73+
// This class is primarily used by the Runtime class, which holds one Adapter
74+
// instance for each physical device visible to the VkInstance. Upon
75+
// construction, this class will populate the physical device properties, but
76+
// will not create the logical device until specifically requested via the
77+
// init_device() function.
78+
//
79+
// init_device() will create the logical device and obtain the VkDevice handle
80+
// for it. It will also create a number of compute queues up to the amount
81+
// requested when the Adapter instance was constructed.
82+
//
83+
// Contexts (which represent one thread of execution) will request a compute
84+
// queue from an Adapter. The Adapter will then select a compute queue to
85+
// assign to the Context, attempting to balance load between all available
86+
// queues. This will allow different Contexts (which typically execute on
87+
// separate threads) to run concurrently.
88+
//
89+
90+
#define NUM_QUEUE_MUTEXES 4
91+
92+
class Adapter final {
93+
public:
94+
explicit Adapter(
95+
VkInstance instance,
96+
PhysicalDevice physical_device,
97+
const uint32_t num_queues);
98+
99+
Adapter(const Adapter&) = delete;
100+
Adapter& operator=(const Adapter&) = delete;
101+
102+
Adapter(Adapter&&) = delete;
103+
Adapter& operator=(Adapter&&) = delete;
104+
105+
~Adapter() = default;
106+
107+
struct Queue {
108+
uint32_t family_index;
109+
uint32_t queue_index;
110+
VkQueueFlags capabilities;
111+
VkQueue handle;
112+
};
113+
114+
private:
115+
// Use a mutex to manage queue usage info since
116+
// it can be accessed from multiple threads
117+
std::mutex queue_usage_mutex_;
118+
// Physical Device Info
119+
PhysicalDevice physical_device_;
120+
// Queue Management
121+
std::vector<Queue> queues_;
122+
std::vector<uint32_t> queue_usage_;
123+
std::array<std::mutex, NUM_QUEUE_MUTEXES> queue_mutexes_;
124+
// Handles
125+
VkInstance instance_;
126+
DeviceHandle device_;
127+
// Device-level resource caches
128+
ShaderLayoutCache shader_layout_cache_;
129+
ShaderCache shader_cache_;
130+
PipelineLayoutCache pipeline_layout_cache_;
131+
ComputePipelineCache compute_pipeline_cache_;
132+
// Memory Management
133+
SamplerCache sampler_cache_;
134+
MemoryAllocator vma_;
135+
136+
public:
137+
// Physical Device metadata
138+
139+
inline VkPhysicalDevice physical_handle() const {
140+
return physical_device_.handle;
141+
}
142+
143+
inline VkDevice device_handle() const {
144+
return device_.handle_;
145+
}
146+
147+
inline bool has_unified_memory() const {
148+
return physical_device_.has_unified_memory;
149+
}
150+
151+
inline uint32_t num_compute_queues() const {
152+
return physical_device_.num_compute_queues;
153+
}
154+
155+
inline bool timestamp_compute_and_graphics() const {
156+
return physical_device_.has_timestamps;
157+
}
158+
159+
inline float timestamp_period() const {
160+
return physical_device_.timestamp_period;
161+
}
162+
163+
// Queue Management
164+
165+
Queue request_queue();
166+
void return_queue(Queue&);
167+
168+
// Caches
169+
170+
inline ShaderLayoutCache& shader_layout_cache() {
171+
return shader_layout_cache_;
172+
}
173+
174+
inline ShaderCache& shader_cache() {
175+
return shader_cache_;
176+
}
177+
178+
inline PipelineLayoutCache& pipeline_layout_cache() {
179+
return pipeline_layout_cache_;
180+
}
181+
182+
inline ComputePipelineCache& compute_pipeline_cache() {
183+
return compute_pipeline_cache_;
184+
}
185+
186+
// Memory Allocation
187+
188+
inline SamplerCache& sampler_cache() {
189+
return sampler_cache_;
190+
}
191+
192+
inline MemoryAllocator& vma() {
193+
return vma_;
194+
}
195+
196+
// Command Buffer Submission
197+
198+
void
199+
submit_cmd(const Queue&, VkCommandBuffer, VkFence fence = VK_NULL_HANDLE);
200+
201+
void submit_cmds(
202+
const Adapter::Queue&,
203+
const std::vector<VkCommandBuffer>&,
204+
VkFence fence = VK_NULL_HANDLE);
205+
206+
// Miscellaneous
207+
208+
inline utils::uvec3 local_work_group_size() const {
209+
return {
210+
4u,
211+
4u,
212+
4u,
213+
};
214+
}
215+
216+
std::string stringize() const;
217+
friend std::ostream& operator<<(std::ostream&, const Adapter&);
218+
};
219+
220+
} // namespace api
221+
} // namespace vulkan
222+
} // namespace native
223+
} // namespace at
224+
225+
#endif /* USE_VULKAN_API */
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#define VMA_IMPLEMENTATION
10+
#include <executorch/backends/vulkan/runtime/api/Allocator.h>
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#pragma once
10+
11+
//
12+
// Do NOT include vk_mem_alloc.h directly.
13+
// Always include this file (Allocator.h) instead.
14+
//
15+
16+
#include <executorch/backends/vulkan/runtime/api/vk_api.h>
17+
18+
#ifdef USE_VULKAN_API
19+
20+
#define VMA_VULKAN_VERSION 1000000
21+
22+
#ifdef USE_VULKAN_WRAPPER
23+
#define VMA_STATIC_VULKAN_FUNCTIONS 0
24+
#else
25+
#define VMA_DYNAMIC_VULKAN_FUNCTIONS 0
26+
#endif /* USE_VULKAN_WRAPPER */
27+
28+
#define VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE (32ull * 1024 * 1024)
29+
#define VMA_SMALL_HEAP_MAX_SIZE (256ull * 1024 * 1024)
30+
31+
#define VMA_STATS_STRING_ENABLED 0
32+
33+
#ifdef VULKAN_DEBUG
34+
#define VMA_DEBUG_ALIGNMENT 4096
35+
#define VMA_DEBUG_ALWAYS_DEDICATED_MEMORY 0
36+
#define VMA_DEBUG_DETECT_CORRUPTION 1
37+
#define VMA_DEBUG_GLOBAL_MUTEX 1
38+
#define VMA_DEBUG_INITIALIZE_ALLOCATIONS 1
39+
#define VMA_DEBUG_MARGIN 64
40+
#define VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY 256
41+
#define VMA_RECORDING_ENABLED 1
42+
43+
#define VMA_DEBUG_LOG(format, ...)
44+
/*
45+
#define VMA_DEBUG_LOG(format, ...) do { \
46+
printf(format, __VA_ARGS__); \
47+
printf("\n"); \
48+
} while(false)
49+
*/
50+
#endif /* VULKAN_DEBUG */
51+
52+
#ifdef __clang__
53+
#pragma clang diagnostic push
54+
#pragma clang diagnostic ignored "-Wnullability-completeness"
55+
#pragma clang diagnostic ignored "-Wunused-variable"
56+
#endif /* __clang__ */
57+
58+
#include <include/vk_mem_alloc.h>
59+
60+
#ifdef __clang__
61+
#pragma clang diagnostic pop
62+
#endif /* __clang__ */
63+
64+
#endif /* USE_VULKAN_API */

0 commit comments

Comments
 (0)