Skip to content

Commit f2dd498

Browse files
committed
add support for CUDA allocation flags
1 parent ace9f4a commit f2dd498

File tree

8 files changed

+232
-28
lines changed

8 files changed

+232
-28
lines changed

include/umf/providers/provider_cuda.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2024 Intel Corporation
2+
* Copyright (C) 2024-2025 Intel Corporation
33
*
44
* Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT.
55
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
@@ -53,6 +53,13 @@ umf_result_t umfCUDAMemoryProviderParamsSetMemoryType(
5353
umf_cuda_memory_provider_params_handle_t hParams,
5454
umf_usm_memory_type_t memoryType);
5555

56+
/// @brief Set the allocation flags in the parameters struct.
57+
/// @param hParams handle to the parameters of the CUDA Memory Provider.
58+
/// @param flags valid combination of CUDA allocation flags.
59+
/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure.
60+
umf_result_t umfCUDAMemoryProviderParamsSetAllocFlags(
61+
umf_cuda_memory_provider_params_handle_t hParams, unsigned int flags);
62+
5663
umf_memory_provider_ops_t *umfCUDAMemoryProviderOps(void);
5764

5865
#ifdef __cplusplus

src/libumf.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ EXPORTS
118118
umfScalablePoolParamsSetGranularity
119119
umfScalablePoolParamsSetKeepAllMemory
120120
; Added in UMF_0.11
121+
umfCUDAMemoryProviderParamsSetAllocFlags
121122
umfFixedMemoryProviderOps
122123
umfFixedMemoryProviderParamsCreate
123124
umfFixedMemoryProviderParamsDestroy

src/libumf.map

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ UMF_0.10 {
116116
};
117117

118118
UMF_0.11 {
119+
umfCUDAMemoryProviderParamsSetAllocFlags;
119120
umfFixedMemoryProviderOps;
120121
umfFixedMemoryProviderParamsCreate;
121122
umfFixedMemoryProviderParamsDestroy;

src/provider/provider_cuda.c

Lines changed: 64 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,14 @@ umf_result_t umfCUDAMemoryProviderParamsSetMemoryType(
5555
return UMF_RESULT_ERROR_NOT_SUPPORTED;
5656
}
5757

58+
umf_result_t umfCUDAMemoryProviderParamsSetAllocFlags(
59+
umf_cuda_memory_provider_params_handle_t hParams, unsigned int flags) {
60+
(void)hParams;
61+
(void)flags;
62+
LOG_ERR("CUDA provider is disabled (UMF_BUILD_CUDA_PROVIDER is OFF)!");
63+
return UMF_RESULT_ERROR_NOT_SUPPORTED;
64+
}
65+
5866
umf_memory_provider_ops_t *umfCUDAMemoryProviderOps(void) {
5967
// not supported
6068
LOG_ERR("CUDA provider is disabled (UMF_BUILD_CUDA_PROVIDER is OFF)!");
@@ -89,13 +97,22 @@ typedef struct cu_memory_provider_t {
8997
CUdevice device;
9098
umf_usm_memory_type_t memory_type;
9199
size_t min_alignment;
100+
unsigned int alloc_flags;
92101
} cu_memory_provider_t;
93102

94103
// CUDA Memory Provider settings struct
95104
typedef struct umf_cuda_memory_provider_params_t {
96-
void *cuda_context_handle; ///< Handle to the CUDA context
97-
int cuda_device_handle; ///< Handle to the CUDA device
98-
umf_usm_memory_type_t memory_type; ///< Allocation memory type
105+
// Handle to the CUDA context
106+
void *cuda_context_handle;
107+
108+
// Handle to the CUDA device
109+
int cuda_device_handle;
110+
111+
// Allocation memory type
112+
umf_usm_memory_type_t memory_type;
113+
114+
// Allocation flags for cuMemHostAlloc/cuMemAllocManaged
115+
unsigned int alloc_flags;
99116
} umf_cuda_memory_provider_params_t;
100117

101118
typedef struct cu_ops_t {
@@ -104,6 +121,7 @@ typedef struct cu_ops_t {
104121
CUmemAllocationGranularity_flags option);
105122
CUresult (*cuMemAlloc)(CUdeviceptr *dptr, size_t bytesize);
106123
CUresult (*cuMemAllocHost)(void **pp, size_t bytesize);
124+
CUresult (*cuMemHostAlloc)(void **pp, size_t bytesize, unsigned int flags);
107125
CUresult (*cuMemAllocManaged)(CUdeviceptr *dptr, size_t bytesize,
108126
unsigned int flags);
109127
CUresult (*cuMemFree)(CUdeviceptr dptr);
@@ -174,6 +192,8 @@ static void init_cu_global_state(void) {
174192
utils_get_symbol_addr(0, "cuMemAlloc_v2", lib_name);
175193
*(void **)&g_cu_ops.cuMemAllocHost =
176194
utils_get_symbol_addr(0, "cuMemAllocHost_v2", lib_name);
195+
*(void **)&g_cu_ops.cuMemHostAlloc =
196+
utils_get_symbol_addr(0, "cuMemHostAlloc", lib_name);
177197
*(void **)&g_cu_ops.cuMemAllocManaged =
178198
utils_get_symbol_addr(0, "cuMemAllocManaged", lib_name);
179199
*(void **)&g_cu_ops.cuMemFree =
@@ -196,12 +216,12 @@ static void init_cu_global_state(void) {
196216
utils_get_symbol_addr(0, "cuIpcCloseMemHandle", lib_name);
197217

198218
if (!g_cu_ops.cuMemGetAllocationGranularity || !g_cu_ops.cuMemAlloc ||
199-
!g_cu_ops.cuMemAllocHost || !g_cu_ops.cuMemAllocManaged ||
200-
!g_cu_ops.cuMemFree || !g_cu_ops.cuMemFreeHost ||
201-
!g_cu_ops.cuGetErrorName || !g_cu_ops.cuGetErrorString ||
202-
!g_cu_ops.cuCtxGetCurrent || !g_cu_ops.cuCtxSetCurrent ||
203-
!g_cu_ops.cuIpcGetMemHandle || !g_cu_ops.cuIpcOpenMemHandle ||
204-
!g_cu_ops.cuIpcCloseMemHandle) {
219+
!g_cu_ops.cuMemAllocHost || !g_cu_ops.cuMemHostAlloc ||
220+
!g_cu_ops.cuMemAllocManaged || !g_cu_ops.cuMemFree ||
221+
!g_cu_ops.cuMemFreeHost || !g_cu_ops.cuGetErrorName ||
222+
!g_cu_ops.cuGetErrorString || !g_cu_ops.cuCtxGetCurrent ||
223+
!g_cu_ops.cuCtxSetCurrent || !g_cu_ops.cuIpcGetMemHandle ||
224+
!g_cu_ops.cuIpcOpenMemHandle || !g_cu_ops.cuIpcCloseMemHandle) {
205225
LOG_ERR("Required CUDA symbols not found.");
206226
Init_cu_global_state_failed = true;
207227
}
@@ -225,6 +245,7 @@ umf_result_t umfCUDAMemoryProviderParamsCreate(
225245
params_data->cuda_context_handle = NULL;
226246
params_data->cuda_device_handle = -1;
227247
params_data->memory_type = UMF_MEMORY_TYPE_UNKNOWN;
248+
params_data->alloc_flags = 0;
228249

229250
*hParams = params_data;
230251

@@ -275,6 +296,18 @@ umf_result_t umfCUDAMemoryProviderParamsSetMemoryType(
275296
return UMF_RESULT_SUCCESS;
276297
}
277298

299+
umf_result_t umfCUDAMemoryProviderParamsSetAllocFlags(
300+
umf_cuda_memory_provider_params_handle_t hParams, unsigned int flags) {
301+
if (!hParams) {
302+
LOG_ERR("CUDA Memory Provider params handle is NULL");
303+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
304+
}
305+
306+
hParams->alloc_flags = flags;
307+
308+
return UMF_RESULT_SUCCESS;
309+
}
310+
278311
static umf_result_t cu_memory_provider_initialize(void *params,
279312
void **provider) {
280313
if (params == NULL) {
@@ -294,6 +327,24 @@ static umf_result_t cu_memory_provider_initialize(void *params,
294327
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
295328
}
296329

330+
if (cu_params->memory_type == UMF_MEMORY_TYPE_SHARED) {
331+
if (cu_params->alloc_flags == 0) {
332+
// if flags are not set, the default setting is CU_MEM_ATTACH_GLOBAL
333+
cu_params->alloc_flags = CU_MEM_ATTACH_GLOBAL;
334+
} else if (cu_params->alloc_flags != CU_MEM_ATTACH_GLOBAL &&
335+
cu_params->alloc_flags != CU_MEM_ATTACH_HOST) {
336+
LOG_ERR("Invalid shared allocation flags");
337+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
338+
}
339+
} else if (cu_params->memory_type == UMF_MEMORY_TYPE_HOST) {
340+
if (cu_params->alloc_flags &
341+
~(CU_MEMHOSTALLOC_PORTABLE | CU_MEMHOSTALLOC_DEVICEMAP |
342+
CU_MEMHOSTALLOC_WRITECOMBINED)) {
343+
LOG_ERR("Invalid host allocation flags");
344+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
345+
}
346+
}
347+
297348
utils_init_once(&cu_is_initialized, init_cu_global_state);
298349
if (Init_cu_global_state_failed) {
299350
LOG_ERR("Loading CUDA symbols failed");
@@ -324,6 +375,7 @@ static umf_result_t cu_memory_provider_initialize(void *params,
324375
cu_provider->device = cu_params->cuda_device_handle;
325376
cu_provider->memory_type = cu_params->memory_type;
326377
cu_provider->min_alignment = min_alignment;
378+
cu_provider->alloc_flags = cu_params->alloc_flags;
327379

328380
*provider = cu_provider;
329381

@@ -381,7 +433,8 @@ static umf_result_t cu_memory_provider_alloc(void *provider, size_t size,
381433
CUresult cu_result = CUDA_SUCCESS;
382434
switch (cu_provider->memory_type) {
383435
case UMF_MEMORY_TYPE_HOST: {
384-
cu_result = g_cu_ops.cuMemAllocHost(resultPtr, size);
436+
cu_result =
437+
g_cu_ops.cuMemHostAlloc(resultPtr, size, cu_provider->alloc_flags);
385438
break;
386439
}
387440
case UMF_MEMORY_TYPE_DEVICE: {
@@ -390,7 +443,7 @@ static umf_result_t cu_memory_provider_alloc(void *provider, size_t size,
390443
}
391444
case UMF_MEMORY_TYPE_SHARED: {
392445
cu_result = g_cu_ops.cuMemAllocManaged((CUdeviceptr *)resultPtr, size,
393-
CU_MEM_ATTACH_GLOBAL);
446+
cu_provider->alloc_flags);
394447
break;
395448
}
396449
default:

test/providers/cuda_helpers.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ struct libcu_ops {
2323
CUresult (*cuMemAlloc)(CUdeviceptr *dptr, size_t size);
2424
CUresult (*cuMemFree)(CUdeviceptr dptr);
2525
CUresult (*cuMemAllocHost)(void **pp, size_t size);
26+
CUresult (*cuMemHostAlloc)(void **pp, size_t size, unsigned int flags);
2627
CUresult (*cuMemAllocManaged)(CUdeviceptr *dptr, size_t bytesize,
2728
unsigned int flags);
2829
CUresult (*cuMemFreeHost)(void *p);
@@ -34,6 +35,7 @@ struct libcu_ops {
3435
CUresult (*cuPointerGetAttributes)(unsigned int numAttributes,
3536
CUpointer_attribute *attributes,
3637
void **data, CUdeviceptr ptr);
38+
CUresult (*cuMemHostGetFlags)(unsigned int *pFlags, void *p);
3739
CUresult (*cuStreamSynchronize)(CUstream hStream);
3840
CUresult (*cuCtxSynchronize)(void);
3941
} libcu_ops;
@@ -72,6 +74,9 @@ struct DlHandleCloser {
7274
libcu_ops.cuMemAllocHost = [](auto... args) {
7375
return noop_stub(args...);
7476
};
77+
libcu_ops.cuMemHostAlloc = [](auto... args) {
78+
return noop_stub(args...);
79+
};
7580
libcu_ops.cuMemAllocManaged = [](auto... args) {
7681
return noop_stub(args...);
7782
};
@@ -90,6 +95,9 @@ struct DlHandleCloser {
9095
libcu_ops.cuPointerGetAttributes = [](auto... args) {
9196
return noop_stub(args...);
9297
};
98+
libcu_ops.cuMemHostGetFlags = [](auto... args) {
99+
return noop_stub(args...);
100+
};
93101
libcu_ops.cuStreamSynchronize = [](auto... args) {
94102
return noop_stub(args...);
95103
};
@@ -170,6 +178,12 @@ int InitCUDAOps() {
170178
fprintf(stderr, "cuMemAllocHost_v2 symbol not found in %s\n", lib_name);
171179
return -1;
172180
}
181+
*(void **)&libcu_ops.cuMemHostAlloc =
182+
utils_get_symbol_addr(cuDlHandle.get(), "cuMemHostAlloc", lib_name);
183+
if (libcu_ops.cuMemHostAlloc == nullptr) {
184+
fprintf(stderr, "cuMemHostAlloc symbol not found in %s\n", lib_name);
185+
return -1;
186+
}
173187
*(void **)&libcu_ops.cuMemAllocManaged =
174188
utils_get_symbol_addr(cuDlHandle.get(), "cuMemAllocManaged", lib_name);
175189
if (libcu_ops.cuMemAllocManaged == nullptr) {
@@ -208,6 +222,12 @@ int InitCUDAOps() {
208222
lib_name);
209223
return -1;
210224
}
225+
*(void **)&libcu_ops.cuMemHostGetFlags =
226+
utils_get_symbol_addr(cuDlHandle.get(), "cuMemHostGetFlags", lib_name);
227+
if (libcu_ops.cuMemHostGetFlags == nullptr) {
228+
fprintf(stderr, "cuMemHostGetFlags symbol not found in %s\n", lib_name);
229+
return -1;
230+
}
211231
*(void **)&libcu_ops.cuStreamSynchronize = utils_get_symbol_addr(
212232
cuDlHandle.get(), "cuStreamSynchronize", lib_name);
213233
if (libcu_ops.cuStreamSynchronize == nullptr) {
@@ -237,13 +257,15 @@ int InitCUDAOps() {
237257
libcu_ops.cuDeviceGet = cuDeviceGet;
238258
libcu_ops.cuMemAlloc = cuMemAlloc;
239259
libcu_ops.cuMemAllocHost = cuMemAllocHost;
260+
libcu_ops.cuMemHostAlloc = cuMemHostAlloc;
240261
libcu_ops.cuMemAllocManaged = cuMemAllocManaged;
241262
libcu_ops.cuMemFree = cuMemFree;
242263
libcu_ops.cuMemFreeHost = cuMemFreeHost;
243264
libcu_ops.cuMemsetD32 = cuMemsetD32;
244265
libcu_ops.cuMemcpy = cuMemcpy;
245266
libcu_ops.cuPointerGetAttribute = cuPointerGetAttribute;
246267
libcu_ops.cuPointerGetAttributes = cuPointerGetAttributes;
268+
libcu_ops.cuMemHostGetFlags = cuMemHostGetFlags;
247269
libcu_ops.cuStreamSynchronize = cuStreamSynchronize;
248270
libcu_ops.cuCtxSynchronize = cuCtxSynchronize;
249271

@@ -373,6 +395,17 @@ umf_usm_memory_type_t get_mem_type(CUcontext context, void *ptr) {
373395
return UMF_MEMORY_TYPE_UNKNOWN;
374396
}
375397

398+
unsigned int get_mem_host_alloc_flags(void *ptr) {
399+
unsigned int flags;
400+
CUresult res = libcu_ops.cuMemHostGetFlags(&flags, ptr);
401+
if (res != CUDA_SUCCESS) {
402+
fprintf(stderr, "cuPointerGetAttribute() failed!\n");
403+
return 0;
404+
}
405+
406+
return flags;
407+
}
408+
376409
CUcontext get_mem_context(void *ptr) {
377410
CUcontext context;
378411
CUresult res = libcu_ops.cuPointerGetAttribute(

test/providers/cuda_helpers.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ int cuda_copy(CUcontext context, CUdevice device, void *dst_ptr,
4242

4343
umf_usm_memory_type_t get_mem_type(CUcontext context, void *ptr);
4444

45+
unsigned int get_mem_host_alloc_flags(void *ptr);
46+
4547
CUcontext get_mem_context(void *ptr);
4648

4749
CUcontext get_current_context();

0 commit comments

Comments
 (0)