Skip to content

Commit 591d28b

Browse files
committed
[SYCL][UR][CUDA] Move CUDA device memory pools to the context
Signed-off-by: Lukasz Dorau <[email protected]>
1 parent 9cf5f6f commit 591d28b

File tree

8 files changed

+197
-164
lines changed

8 files changed

+197
-164
lines changed

unified-runtime/source/adapters/cuda/common.cpp

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@
99
//===----------------------------------------------------------------------===//
1010

1111
#include "common.hpp"
12+
#include "device.hpp"
1213
#include "logger/ur_logger.hpp"
14+
#include "umf_helpers.hpp"
1315

1416
#include <cuda.h>
1517

@@ -129,3 +131,84 @@ void setPluginSpecificMessage(CUresult cu_res) {
129131
setErrorMessage(message, UR_RESULT_ERROR_ADAPTER_SPECIFIC);
130132
free(message);
131133
}
134+
135+
namespace umf {
136+
137+
ur_result_t getProviderNativeError(const char *, int32_t) {
138+
// TODO: implement when UMF supports CUDA
139+
return UR_RESULT_ERROR_UNKNOWN;
140+
}
141+
142+
// Create UMF CUDA memory provider for the host memory (UMF_MEMORY_TYPE_HOST)
143+
// from a device
144+
ur_result_t
145+
createHostMemoryProvider(CUcontext contextCUDA,
146+
umf_memory_provider_handle_t *memoryProviderHost) {
147+
*memoryProviderHost = nullptr;
148+
149+
umf_cuda_memory_provider_params_handle_t CUMemoryProviderParams = nullptr;
150+
umf_result_t UmfResult =
151+
umfCUDAMemoryProviderParamsCreate(&CUMemoryProviderParams);
152+
UMF_RETURN_UR_ERROR(UmfResult);
153+
154+
OnScopeExit Cleanup(
155+
[=]() { umfCUDAMemoryProviderParamsDestroy(CUMemoryProviderParams); });
156+
157+
UmfResult =
158+
umf::setCUMemoryProviderParams(CUMemoryProviderParams, 0 /* cuDevice */,
159+
contextCUDA, UMF_MEMORY_TYPE_HOST);
160+
UMF_RETURN_UR_ERROR(UmfResult);
161+
162+
// create UMF CUDA memory provider and pool for the host memory
163+
// (UMF_MEMORY_TYPE_HOST)
164+
UmfResult = umfMemoryProviderCreate(
165+
umfCUDAMemoryProviderOps(), CUMemoryProviderParams, memoryProviderHost);
166+
UMF_RETURN_UR_ERROR(UmfResult);
167+
168+
return UR_RESULT_SUCCESS;
169+
}
170+
171+
// Create UMF CUDA memory providers for the device memory (UMF_MEMORY_TYPE_HOST)
172+
// and the shared memory (UMF_MEMORY_TYPE_SHARED)
173+
ur_result_t createDeviceMemoryProviders(
174+
ur_device_handle_t_ *DeviceHandle,
175+
umf_memory_provider_handle_t *memoryDeviceProvider,
176+
umf_memory_provider_handle_t *memorySharedProvider) {
177+
umf_cuda_memory_provider_params_handle_t CUMemoryProviderParams = nullptr;
178+
179+
umf_result_t UmfResult =
180+
umfCUDAMemoryProviderParamsCreate(&CUMemoryProviderParams);
181+
UMF_RETURN_UR_ERROR(UmfResult);
182+
183+
OnScopeExit Cleanup(
184+
[=]() { umfCUDAMemoryProviderParamsDestroy(CUMemoryProviderParams); });
185+
186+
CUdevice device = DeviceHandle->get();
187+
CUcontext context = DeviceHandle->getNativeContext();
188+
189+
// create UMF CUDA memory provider for the device memory
190+
// (UMF_MEMORY_TYPE_DEVICE)
191+
UmfResult = umf::setCUMemoryProviderParams(CUMemoryProviderParams, device,
192+
context, UMF_MEMORY_TYPE_DEVICE);
193+
UMF_RETURN_UR_ERROR(UmfResult);
194+
195+
*memoryDeviceProvider = nullptr;
196+
UmfResult = umfMemoryProviderCreate(
197+
umfCUDAMemoryProviderOps(), CUMemoryProviderParams, memoryDeviceProvider);
198+
UMF_RETURN_UR_ERROR(UmfResult);
199+
200+
// create UMF CUDA memory provider for the shared memory
201+
// (UMF_MEMORY_TYPE_SHARED)
202+
UmfResult = umf::setCUMemoryProviderParams(CUMemoryProviderParams, device,
203+
context, UMF_MEMORY_TYPE_SHARED);
204+
UMF_RETURN_UR_ERROR(UmfResult);
205+
206+
*memorySharedProvider = nullptr;
207+
UmfResult = umfMemoryProviderCreate(
208+
umfCUDAMemoryProviderOps(), CUMemoryProviderParams, memorySharedProvider);
209+
UMF_RETURN_UR_ERROR(UmfResult);
210+
211+
return UR_RESULT_SUCCESS;
212+
}
213+
214+
} // namespace umf

unified-runtime/source/adapters/cuda/common.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ void assertion(bool Condition, const char *Message = nullptr);
7373

7474
namespace umf {
7575

76+
ur_result_t getProviderNativeError(const char *, int32_t);
77+
7678
inline umf_result_t setCUMemoryProviderParams(
7779
umf_cuda_memory_provider_params_handle_t CUMemoryProviderParams,
7880
int cuDevice, void *cuContext, umf_usm_memory_type_t memType) {
@@ -92,4 +94,13 @@ inline umf_result_t setCUMemoryProviderParams(
9294
return UMF_RESULT_SUCCESS;
9395
}
9496

97+
ur_result_t
98+
createHostMemoryProvider(CUcontext contextCUDA,
99+
umf_memory_provider_handle_t *memoryProviderHost);
100+
101+
ur_result_t
102+
createDeviceMemoryProviders(ur_device_handle_t_ *DeviceHandle,
103+
umf_memory_provider_handle_t *memoryDeviceProvider,
104+
umf_memory_provider_handle_t *memorySharedProvider);
105+
95106
} // namespace umf

unified-runtime/source/adapters/cuda/context.hpp

Lines changed: 74 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -76,38 +76,6 @@ typedef void (*ur_context_extended_deleter_t)(void *user_data);
7676
///
7777
///
7878

79-
static ur_result_t
80-
CreateHostMemoryProviderPool(ur_device_handle_t_ *DeviceHandle,
81-
umf_memory_provider_handle_t *MemoryProviderHost,
82-
umf_memory_pool_handle_t *MemoryPoolHost) {
83-
84-
*MemoryProviderHost = nullptr;
85-
CUcontext context = DeviceHandle->getNativeContext();
86-
87-
umf_cuda_memory_provider_params_handle_t CUMemoryProviderParams = nullptr;
88-
umf_result_t UmfResult =
89-
umfCUDAMemoryProviderParamsCreate(&CUMemoryProviderParams);
90-
UMF_RETURN_UR_ERROR(UmfResult);
91-
OnScopeExit Cleanup(
92-
[=]() { umfCUDAMemoryProviderParamsDestroy(CUMemoryProviderParams); });
93-
94-
UmfResult = umf::setCUMemoryProviderParams(
95-
CUMemoryProviderParams, 0 /* cuDevice */, context, UMF_MEMORY_TYPE_HOST);
96-
UMF_RETURN_UR_ERROR(UmfResult);
97-
98-
// create UMF CUDA memory provider and pool for the host memory
99-
// (UMF_MEMORY_TYPE_HOST)
100-
UmfResult = umfMemoryProviderCreate(
101-
umfCUDAMemoryProviderOps(), CUMemoryProviderParams, MemoryProviderHost);
102-
UMF_RETURN_UR_ERROR(UmfResult);
103-
104-
UmfResult = umfPoolCreate(umfProxyPoolOps(), *MemoryProviderHost, nullptr, 0,
105-
MemoryPoolHost);
106-
UMF_RETURN_UR_ERROR(UmfResult);
107-
108-
return UR_RESULT_SUCCESS;
109-
}
110-
11179
struct ur_context_handle_t_ {
11280

11381
struct deleter_data {
@@ -120,30 +88,42 @@ struct ur_context_handle_t_ {
12088
std::vector<ur_device_handle_t> Devices;
12189
std::atomic_uint32_t RefCount;
12290

123-
// UMF CUDA memory provider and pool for the host memory
91+
// UMF CUDA memory pool for the host memory
12492
// (UMF_MEMORY_TYPE_HOST)
125-
umf_memory_provider_handle_t MemoryProviderHost = nullptr;
12693
umf_memory_pool_handle_t MemoryPoolHost = nullptr;
12794

95+
// UMF CUDA memory pools for the device memory
96+
// (UMF_MEMORY_TYPE_DEVICE)
97+
std::vector<umf_memory_pool_handle_t> MemoryDevicePools;
98+
99+
// UMF CUDA memory pools for the shared memory
100+
// (UMF_MEMORY_TYPE_SHARED)
101+
std::vector<umf_memory_pool_handle_t> MemorySharedPools;
102+
128103
ur_context_handle_t_(const ur_device_handle_t *Devs, uint32_t NumDevices)
129104
: Devices{Devs, Devs + NumDevices}, RefCount{1} {
130105
for (auto &Dev : Devices) {
131106
urDeviceRetain(Dev);
132107
}
133108

134-
// Create UMF CUDA memory provider for the host memory
135-
// (UMF_MEMORY_TYPE_HOST) from any device (Devices[0] is used here, because
136-
// it is guaranteed to exist).
137-
UR_CHECK_ERROR(CreateHostMemoryProviderPool(Devices[0], &MemoryProviderHost,
138-
&MemoryPoolHost));
109+
// Create UMF CUDA memory provider and pool for the host memory
110+
// (UMF_MEMORY_TYPE_HOST)
111+
UR_CHECK_ERROR(createHostMemoryPool());
112+
113+
// Create UMF CUDA memory providers and pools for the device memory
114+
// (UMF_MEMORY_TYPE_HOST) and the shared memory (UMF_MEMORY_TYPE_SHARED).
115+
UR_CHECK_ERROR(createDeviceMemoryPools());
139116
};
140117

141118
~ur_context_handle_t_() {
142119
if (MemoryPoolHost) {
143120
umfPoolDestroy(MemoryPoolHost);
144121
}
145-
if (MemoryProviderHost) {
146-
umfMemoryProviderDestroy(MemoryProviderHost);
122+
for (auto &Pool : MemoryDevicePools) {
123+
umfPoolDestroy(Pool);
124+
}
125+
for (auto &Pool : MemorySharedPools) {
126+
umfPoolDestroy(Pool);
147127
}
148128
for (auto &Dev : Devices) {
149129
urDeviceRelease(Dev);
@@ -190,6 +170,59 @@ struct ur_context_handle_t_ {
190170
std::mutex Mutex;
191171
std::vector<deleter_data> ExtendedDeleters;
192172
std::set<ur_usm_pool_handle_t> PoolHandles;
173+
174+
// Create UMF CUDA memory pool for the host memory (UMF_MEMORY_TYPE_HOST)
175+
ur_result_t createHostMemoryPool() {
176+
umf_memory_provider_handle_t memoryProviderHost = nullptr;
177+
ur_result_t URResult = umf::createHostMemoryProvider(
178+
Devices[0]->getNativeContext(), &memoryProviderHost);
179+
if (URResult != UR_RESULT_SUCCESS) {
180+
return URResult;
181+
}
182+
183+
umf_result_t UmfResult =
184+
umfPoolCreate(umfProxyPoolOps(), memoryProviderHost, nullptr,
185+
UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &MemoryPoolHost);
186+
UMF_RETURN_UR_ERROR(UmfResult);
187+
188+
return UR_RESULT_SUCCESS;
189+
}
190+
191+
// Create UMF CUDA memory pools for the device memory (UMF_MEMORY_TYPE_HOST)
192+
// and the shared memory (UMF_MEMORY_TYPE_SHARED)
193+
ur_result_t createDeviceMemoryPools() {
194+
for (auto &Device : Devices) {
195+
umf_memory_provider_handle_t memoryDeviceProvider = nullptr;
196+
umf_memory_provider_handle_t memorySharedProvider = nullptr;
197+
ur_result_t URResult = umf::createDeviceMemoryProviders(
198+
Device, &memoryDeviceProvider, &memorySharedProvider);
199+
if (URResult != UR_RESULT_SUCCESS) {
200+
return URResult;
201+
}
202+
203+
// create UMF CUDA memory pool for the device memory
204+
// (UMF_MEMORY_TYPE_DEVICE)
205+
umf_result_t UmfResult;
206+
umf_memory_pool_handle_t memoryDevicePool = nullptr;
207+
UmfResult =
208+
umfPoolCreate(umfProxyPoolOps(), memoryDeviceProvider, nullptr,
209+
UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &memoryDevicePool);
210+
UMF_RETURN_UR_ERROR(UmfResult);
211+
212+
// create UMF CUDA memory pool for the shared memory
213+
// (UMF_MEMORY_TYPE_SHARED)
214+
umf_memory_pool_handle_t memorySharedPool = nullptr;
215+
UmfResult =
216+
umfPoolCreate(umfProxyPoolOps(), memorySharedProvider, nullptr,
217+
UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &memorySharedPool);
218+
UMF_RETURN_UR_ERROR(UmfResult);
219+
220+
MemoryDevicePools.push_back(memoryDevicePool);
221+
MemorySharedPools.push_back(memorySharedPool);
222+
}
223+
224+
return UR_RESULT_SUCCESS;
225+
}
193226
};
194227

195228
namespace {

unified-runtime/source/adapters/cuda/device.hpp

Lines changed: 1 addition & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -82,28 +82,9 @@ struct ur_device_handle_t_ {
8282
// CUDA doesn't really have this concept, and could allow almost 100% of
8383
// global memory in one allocation, but is dependent on device usage.
8484
UR_CHECK_ERROR(cuDeviceTotalMem(&MaxAllocSize, cuDevice));
85-
86-
MemoryProviderDevice = nullptr;
87-
MemoryProviderShared = nullptr;
88-
MemoryPoolDevice = nullptr;
89-
MemoryPoolShared = nullptr;
9085
}
9186

92-
~ur_device_handle_t_() {
93-
if (MemoryPoolDevice) {
94-
umfPoolDestroy(MemoryPoolDevice);
95-
}
96-
if (MemoryPoolShared) {
97-
umfPoolDestroy(MemoryPoolShared);
98-
}
99-
if (MemoryProviderDevice) {
100-
umfMemoryProviderDestroy(MemoryProviderDevice);
101-
}
102-
if (MemoryProviderShared) {
103-
umfMemoryProviderDestroy(MemoryProviderShared);
104-
}
105-
cuDevicePrimaryCtxRelease(CuDevice);
106-
}
87+
~ur_device_handle_t_() { cuDevicePrimaryCtxRelease(CuDevice); }
10788

10889
native_type get() const noexcept { return CuDevice; };
10990

@@ -139,16 +120,6 @@ struct ur_device_handle_t_ {
139120

140121
// bookkeeping for mipmappedArray leaks in Mapping external Memory
141122
std::map<CUarray, CUmipmappedArray> ChildCuarrayFromMipmapMap;
142-
143-
// UMF CUDA memory provider and pool for the device memory
144-
// (UMF_MEMORY_TYPE_DEVICE)
145-
umf_memory_provider_handle_t MemoryProviderDevice;
146-
umf_memory_pool_handle_t MemoryPoolDevice;
147-
148-
// UMF CUDA memory provider and pool for the shared memory
149-
// (UMF_MEMORY_TYPE_SHARED)
150-
umf_memory_provider_handle_t MemoryProviderShared;
151-
umf_memory_pool_handle_t MemoryPoolShared;
152123
};
153124

154125
int getAttribute(ur_device_handle_t Device, CUdevice_attribute Attribute);

unified-runtime/source/adapters/cuda/memory.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition(
422422
ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t Mem,
423423
const ur_device_handle_t hDevice) {
424424
ScopedContext Active(hDevice);
425-
auto DeviceIdx = Mem->getContext()->getDeviceIndex(hDevice);
425+
ur_context_handle_t Context = Mem->getContext();
426+
auto DeviceIdx = Context->getDeviceIndex(hDevice);
426427
ur_lock LockGuard(Mem->MemoryAllocationMutex);
427428

428429
if (Mem->isBuffer()) {
@@ -442,7 +443,8 @@ ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t Mem,
442443
CU_MEMHOSTALLOC_DEVICEMAP));
443444
UR_CHECK_ERROR(cuMemHostGetDevicePointer(&DevPtr, Buffer.HostPtr, 0));
444445
} else {
445-
*(void **)&DevPtr = umfPoolMalloc(hDevice->MemoryPoolDevice, Buffer.Size);
446+
*(void **)&DevPtr =
447+
umfPoolMalloc(Context->MemoryDevicePools[DeviceIdx], Buffer.Size);
446448
UMF_CHECK_PTR(*(void **)&DevPtr);
447449
}
448450
} else {

0 commit comments

Comments
 (0)