Skip to content

Commit a527dd1

Browse files
authored
[SYCL][UR][L0] Replace memory type look-up with UMF tracking (#10807)
This also makes SharedReadOnly allocations tracking obsolete.
1 parent 37f0be0 commit a527dd1

File tree

6 files changed

+241
-222
lines changed

6 files changed

+241
-222
lines changed

sycl/plugins/unified_runtime/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ if (NOT DEFINED UNIFIED_RUNTIME_LIBRARY OR NOT DEFINED UNIFIED_RUNTIME_INCLUDE_D
66
set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
77
set(UNIFIED_RUNTIME_TAG b3cc9ae3f99ca7faff1ba765dd36652fef2cfddd)
88

9+
set(UMF_ENABLE_POOL_TRACKING ON)
910
message(STATUS "Will fetch Unified Runtime from ${UNIFIED_RUNTIME_REPO}")
1011
FetchContent_Declare(unified-runtime
1112
GIT_REPOSITORY ${UNIFIED_RUNTIME_REPO}
@@ -46,7 +47,6 @@ if (NOT DEFINED UNIFIED_RUNTIME_LIBRARY OR NOT DEFINED UNIFIED_RUNTIME_INCLUDE_D
4647
set(UNIFIED_RUNTIME_INCLUDE_DIR "${UNIFIED_RUNTIME_SOURCE_DIR}/include")
4748
endif()
4849

49-
5050
add_library(UnifiedRuntime-Headers INTERFACE)
5151

5252
target_include_directories(UnifiedRuntime-Headers

sycl/plugins/unified_runtime/ur/adapters/level_zero/context.cpp

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ ur_result_t ur_context_handle_t_::initialize() {
182182
// Note that the CCS devices and their respective subdevices share a
183183
// common ze_device_handle and therefore, also share USM allocators.
184184
auto createUSMAllocators = [this](ur_device_handle_t Device) {
185-
auto MemProvider = umf::memoryProviderMakeUnique<USMDeviceMemoryProvider>(
185+
auto MemProvider = umf::memoryProviderMakeUnique<L0DeviceMemoryProvider>(
186186
reinterpret_cast<ur_context_handle_t>(this), Device)
187187
.second;
188188
DeviceMemPools.emplace(
@@ -193,7 +193,7 @@ ur_result_t ur_context_handle_t_::initialize() {
193193
.Configs[usm::DisjointPoolMemType::Device])
194194
.second));
195195

196-
MemProvider = umf::memoryProviderMakeUnique<USMSharedMemoryProvider>(
196+
MemProvider = umf::memoryProviderMakeUnique<L0SharedMemoryProvider>(
197197
reinterpret_cast<ur_context_handle_t>(this), Device)
198198
.second;
199199
SharedMemPools.emplace(
@@ -204,10 +204,9 @@ ur_result_t ur_context_handle_t_::initialize() {
204204
.Configs[usm::DisjointPoolMemType::Shared])
205205
.second));
206206

207-
MemProvider =
208-
umf::memoryProviderMakeUnique<USMSharedReadOnlyMemoryProvider>(
209-
reinterpret_cast<ur_context_handle_t>(this), Device)
210-
.second;
207+
MemProvider = umf::memoryProviderMakeUnique<L0SharedReadOnlyMemoryProvider>(
208+
reinterpret_cast<ur_context_handle_t>(this), Device)
209+
.second;
211210
SharedReadOnlyMemPools.emplace(
212211
std::piecewise_construct, std::make_tuple(Device->ZeDevice),
213212
std::make_tuple(
@@ -216,6 +215,33 @@ ur_result_t ur_context_handle_t_::initialize() {
216215
DisjointPoolConfigInstance
217216
.Configs[usm::DisjointPoolMemType::SharedReadOnly])
218217
.second));
218+
219+
MemProvider = umf::memoryProviderMakeUnique<L0DeviceMemoryProvider>(
220+
reinterpret_cast<ur_context_handle_t>(this), Device)
221+
.second;
222+
DeviceMemProxyPools.emplace(
223+
std::piecewise_construct, std::make_tuple(Device->ZeDevice),
224+
std::make_tuple(
225+
umf::poolMakeUnique<USMProxyPool, 1>({std::move(MemProvider)})
226+
.second));
227+
228+
MemProvider = umf::memoryProviderMakeUnique<L0SharedMemoryProvider>(
229+
reinterpret_cast<ur_context_handle_t>(this), Device)
230+
.second;
231+
SharedMemProxyPools.emplace(
232+
std::piecewise_construct, std::make_tuple(Device->ZeDevice),
233+
std::make_tuple(
234+
umf::poolMakeUnique<USMProxyPool, 1>({std::move(MemProvider)})
235+
.second));
236+
237+
MemProvider = umf::memoryProviderMakeUnique<L0SharedReadOnlyMemoryProvider>(
238+
reinterpret_cast<ur_context_handle_t>(this), Device)
239+
.second;
240+
SharedReadOnlyMemProxyPools.emplace(
241+
std::piecewise_construct, std::make_tuple(Device->ZeDevice),
242+
std::make_tuple(
243+
umf::poolMakeUnique<USMProxyPool, 1>({std::move(MemProvider)})
244+
.second));
219245
};
220246

221247
// Recursive helper to call createUSMAllocators for all sub-devices
@@ -236,7 +262,7 @@ ur_result_t ur_context_handle_t_::initialize() {
236262
// Create USM pool for host. Device and Shared USM allocations
237263
// are device-specific. Host allocations are not device-dependent therefore
238264
// we don't need a map with device as key.
239-
auto MemProvider = umf::memoryProviderMakeUnique<USMHostMemoryProvider>(
265+
auto MemProvider = umf::memoryProviderMakeUnique<L0HostMemoryProvider>(
240266
reinterpret_cast<ur_context_handle_t>(this), nullptr)
241267
.second;
242268
HostMemPool =
@@ -245,6 +271,12 @@ ur_result_t ur_context_handle_t_::initialize() {
245271
DisjointPoolConfigInstance.Configs[usm::DisjointPoolMemType::Host])
246272
.second;
247273

274+
MemProvider = umf::memoryProviderMakeUnique<L0HostMemoryProvider>(
275+
reinterpret_cast<ur_context_handle_t>(this), nullptr)
276+
.second;
277+
HostMemProxyPool =
278+
umf::poolMakeUnique<USMProxyPool, 1>({std::move(MemProvider)}).second;
279+
248280
// We may allocate memory to this root device so create allocators.
249281
if (SingleRootDevice &&
250282
DeviceMemPools.find(SingleRootDevice->ZeDevice) == DeviceMemPools.end()) {

sycl/plugins/unified_runtime/ur/adapters/level_zero/context.hpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -101,14 +101,18 @@ struct ur_context_handle_t_ : _ur_object {
101101
std::unordered_map<ze_device_handle_t, umf::pool_unique_handle_t>
102102
SharedReadOnlyMemPools;
103103

104-
// Since L0 native runtime does not distinguisg "shared device_read_only"
105-
// vs regular "shared" allocations, we have keep track of it to use
106-
// proper memory pool when freeing allocations.
107-
std::unordered_set<void *> SharedReadOnlyAllocs;
108-
109104
// Store the host memory pool. It does not depend on any device.
110105
umf::pool_unique_handle_t HostMemPool;
111106

107+
// Allocation-tracking proxy pools for direct allocations. No pooling used.
108+
std::unordered_map<ze_device_handle_t, umf::pool_unique_handle_t>
109+
DeviceMemProxyPools;
110+
std::unordered_map<ze_device_handle_t, umf::pool_unique_handle_t>
111+
SharedMemProxyPools;
112+
std::unordered_map<ze_device_handle_t, umf::pool_unique_handle_t>
113+
SharedReadOnlyMemProxyPools;
114+
umf::pool_unique_handle_t HostMemProxyPool;
115+
112116
// We need to store all memory allocations in the context because there could
113117
// be kernels with indirect access. Kernels with indirect access start to
114118
// reference all existing memory allocations at the time when they are

0 commit comments

Comments
 (0)