Skip to content

Commit 0bbce23

Browse files
committed
add support for CUDA allocation flags
1 parent 1fa3f8a commit 0bbce23

File tree

8 files changed

+160
-16
lines changed

8 files changed

+160
-16
lines changed

include/umf/providers/provider_cuda.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2024 Intel Corporation
2+
* Copyright (C) 2024-2025 Intel Corporation
33
*
44
* Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT.
55
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
@@ -53,6 +53,13 @@ umf_result_t umfCUDAMemoryProviderParamsSetMemoryType(
5353
umf_cuda_memory_provider_params_handle_t hParams,
5454
umf_usm_memory_type_t memoryType);
5555

56+
/// @brief Set the allocation flags in the parameters struct.
57+
/// @param hParams handle to the parameters of the CUDA Memory Provider.
58+
/// @param flags valid combination of CUDA allocation flags.
59+
/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure.
60+
umf_result_t umfCUDAMemoryProviderParamsSetAllocFlags(
61+
umf_cuda_memory_provider_params_handle_t hParams, unsigned int flags);
62+
5663
umf_memory_provider_ops_t *umfCUDAMemoryProviderOps(void);
5764

5865
#ifdef __cplusplus

src/libumf.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ EXPORTS
118118
umfScalablePoolParamsSetGranularity
119119
umfScalablePoolParamsSetKeepAllMemory
120120
; Added in UMF_0.11
121+
umfCUDAMemoryProviderParamsSetAllocFlags
121122
umfFixedMemoryProviderOps
122123
umfFixedMemoryProviderParamsCreate
123124
umfFixedMemoryProviderParamsDestroy

src/libumf.map

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ UMF_0.10 {
116116
};
117117

118118
UMF_0.11 {
119+
umfCUDAMemoryProviderParamsSetAllocFlags;
119120
umfFixedMemoryProviderOps;
120121
umfFixedMemoryProviderParamsCreate;
121122
umfFixedMemoryProviderParamsDestroy;

src/provider/provider_cuda.c

Lines changed: 64 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,14 @@ umf_result_t umfCUDAMemoryProviderParamsSetMemoryType(
5555
return UMF_RESULT_ERROR_NOT_SUPPORTED;
5656
}
5757

58+
umf_result_t umfCUDAMemoryProviderParamsSetAllocFlags(
59+
umf_cuda_memory_provider_params_handle_t hParams, unsigned int flags) {
60+
(void)hParams;
61+
(void)flags;
62+
LOG_ERR("CUDA provider is disabled (UMF_BUILD_CUDA_PROVIDER is OFF)!");
63+
return UMF_RESULT_ERROR_NOT_SUPPORTED;
64+
}
65+
5866
umf_memory_provider_ops_t *umfCUDAMemoryProviderOps(void) {
5967
// not supported
6068
LOG_ERR("CUDA provider is disabled (UMF_BUILD_CUDA_PROVIDER is OFF)!");
@@ -89,13 +97,22 @@ typedef struct cu_memory_provider_t {
8997
CUdevice device;
9098
umf_usm_memory_type_t memory_type;
9199
size_t min_alignment;
100+
unsigned int alloc_flags;
92101
} cu_memory_provider_t;
93102

94103
// CUDA Memory Provider settings struct
95104
typedef struct umf_cuda_memory_provider_params_t {
96-
void *cuda_context_handle; ///< Handle to the CUDA context
97-
int cuda_device_handle; ///< Handle to the CUDA device
98-
umf_usm_memory_type_t memory_type; ///< Allocation memory type
105+
// Handle to the CUDA context
106+
void *cuda_context_handle;
107+
108+
// Handle to the CUDA device
109+
int cuda_device_handle;
110+
111+
// Allocation memory type
112+
umf_usm_memory_type_t memory_type;
113+
114+
// Allocation flags for cuMemHostAlloc/cuMemAllocManaged
115+
unsigned int alloc_flags;
99116
} umf_cuda_memory_provider_params_t;
100117

101118
typedef struct cu_ops_t {
@@ -104,6 +121,7 @@ typedef struct cu_ops_t {
104121
CUmemAllocationGranularity_flags option);
105122
CUresult (*cuMemAlloc)(CUdeviceptr *dptr, size_t bytesize);
106123
CUresult (*cuMemAllocHost)(void **pp, size_t bytesize);
124+
CUresult (*cuMemHostAlloc)(void **pp, size_t bytesize, unsigned int flags);
107125
CUresult (*cuMemAllocManaged)(CUdeviceptr *dptr, size_t bytesize,
108126
unsigned int flags);
109127
CUresult (*cuMemFree)(CUdeviceptr dptr);
@@ -175,6 +193,8 @@ static void init_cu_global_state(void) {
175193
utils_get_symbol_addr(0, "cuMemAlloc_v2", lib_name);
176194
*(void **)&g_cu_ops.cuMemAllocHost =
177195
utils_get_symbol_addr(0, "cuMemAllocHost_v2", lib_name);
196+
*(void **)&g_cu_ops.cuMemHostAlloc =
197+
utils_get_symbol_addr(0, "cuMemHostAlloc", lib_name);
178198
*(void **)&g_cu_ops.cuMemAllocManaged =
179199
utils_get_symbol_addr(0, "cuMemAllocManaged", lib_name);
180200
*(void **)&g_cu_ops.cuMemFree =
@@ -197,12 +217,12 @@ static void init_cu_global_state(void) {
197217
utils_get_symbol_addr(0, "cuIpcCloseMemHandle", lib_name);
198218

199219
if (!g_cu_ops.cuMemGetAllocationGranularity || !g_cu_ops.cuMemAlloc ||
200-
!g_cu_ops.cuMemAllocHost || !g_cu_ops.cuMemAllocManaged ||
201-
!g_cu_ops.cuMemFree || !g_cu_ops.cuMemFreeHost ||
202-
!g_cu_ops.cuGetErrorName || !g_cu_ops.cuGetErrorString ||
203-
!g_cu_ops.cuCtxGetCurrent || !g_cu_ops.cuCtxSetCurrent ||
204-
!g_cu_ops.cuIpcGetMemHandle || !g_cu_ops.cuIpcOpenMemHandle ||
205-
!g_cu_ops.cuIpcCloseMemHandle) {
220+
!g_cu_ops.cuMemAllocHost || !g_cu_ops.cuMemHostAlloc ||
221+
!g_cu_ops.cuMemAllocManaged || !g_cu_ops.cuMemFree ||
222+
!g_cu_ops.cuMemFreeHost || !g_cu_ops.cuGetErrorName ||
223+
!g_cu_ops.cuGetErrorString || !g_cu_ops.cuCtxGetCurrent ||
224+
!g_cu_ops.cuCtxSetCurrent || !g_cu_ops.cuIpcGetMemHandle ||
225+
!g_cu_ops.cuIpcOpenMemHandle || !g_cu_ops.cuIpcCloseMemHandle) {
206226
LOG_ERR("Required CUDA symbols not found.");
207227
Init_cu_global_state_failed = true;
208228
}
@@ -226,6 +246,7 @@ umf_result_t umfCUDAMemoryProviderParamsCreate(
226246
params_data->cuda_context_handle = NULL;
227247
params_data->cuda_device_handle = -1;
228248
params_data->memory_type = UMF_MEMORY_TYPE_UNKNOWN;
249+
params_data->alloc_flags = 0;
229250

230251
*hParams = params_data;
231252

@@ -276,6 +297,18 @@ umf_result_t umfCUDAMemoryProviderParamsSetMemoryType(
276297
return UMF_RESULT_SUCCESS;
277298
}
278299

300+
umf_result_t umfCUDAMemoryProviderParamsSetAllocFlags(
301+
umf_cuda_memory_provider_params_handle_t hParams, unsigned int flags) {
302+
if (!hParams) {
303+
LOG_ERR("CUDA Memory Provider params handle is NULL");
304+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
305+
}
306+
307+
hParams->alloc_flags = flags;
308+
309+
return UMF_RESULT_SUCCESS;
310+
}
311+
279312
static umf_result_t cu_memory_provider_initialize(void *params,
280313
void **provider) {
281314
if (params == NULL) {
@@ -295,6 +328,24 @@ static umf_result_t cu_memory_provider_initialize(void *params,
295328
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
296329
}
297330

331+
if (cu_params->memory_type == UMF_MEMORY_TYPE_SHARED) {
332+
if (cu_params->alloc_flags == 0) {
333+
// if flags are not set, the default setting is CU_MEM_ATTACH_GLOBAL
334+
cu_params->alloc_flags = CU_MEM_ATTACH_GLOBAL;
335+
} else if (cu_params->alloc_flags != CU_MEM_ATTACH_GLOBAL &&
336+
cu_params->alloc_flags != CU_MEM_ATTACH_HOST) {
337+
LOG_ERR("Invalid shared allocation flags");
338+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
339+
}
340+
} else if (cu_params->memory_type == UMF_MEMORY_TYPE_HOST) {
341+
if (cu_params->alloc_flags &
342+
~(CU_MEMHOSTALLOC_PORTABLE | CU_MEMHOSTALLOC_DEVICEMAP |
343+
CU_MEMHOSTALLOC_WRITECOMBINED)) {
344+
LOG_ERR("Invalid host allocation flags");
345+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
346+
}
347+
}
348+
298349
utils_init_once(&cu_is_initialized, init_cu_global_state);
299350
if (Init_cu_global_state_failed) {
300351
LOG_ERR("Loading CUDA symbols failed");
@@ -325,6 +376,7 @@ static umf_result_t cu_memory_provider_initialize(void *params,
325376
cu_provider->device = cu_params->cuda_device_handle;
326377
cu_provider->memory_type = cu_params->memory_type;
327378
cu_provider->min_alignment = min_alignment;
379+
cu_provider->alloc_flags = cu_params->alloc_flags;
328380

329381
*provider = cu_provider;
330382

@@ -382,7 +434,8 @@ static umf_result_t cu_memory_provider_alloc(void *provider, size_t size,
382434
CUresult cu_result = CUDA_SUCCESS;
383435
switch (cu_provider->memory_type) {
384436
case UMF_MEMORY_TYPE_HOST: {
385-
cu_result = g_cu_ops.cuMemAllocHost(resultPtr, size);
437+
cu_result =
438+
g_cu_ops.cuMemHostAlloc(resultPtr, size, cu_provider->alloc_flags);
386439
break;
387440
}
388441
case UMF_MEMORY_TYPE_DEVICE: {
@@ -391,7 +444,7 @@ static umf_result_t cu_memory_provider_alloc(void *provider, size_t size,
391444
}
392445
case UMF_MEMORY_TYPE_SHARED: {
393446
cu_result = g_cu_ops.cuMemAllocManaged((CUdeviceptr *)resultPtr, size,
394-
CU_MEM_ATTACH_GLOBAL);
447+
cu_provider->alloc_flags);
395448
break;
396449
}
397450
default:

test/providers/cuda_helpers.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ struct libcu_ops {
2323
CUresult (*cuMemAlloc)(CUdeviceptr *dptr, size_t size);
2424
CUresult (*cuMemFree)(CUdeviceptr dptr);
2525
CUresult (*cuMemAllocHost)(void **pp, size_t size);
26+
CUresult (*cuMemHostAlloc)(void **pp, size_t size, unsigned int flags);
2627
CUresult (*cuMemAllocManaged)(CUdeviceptr *dptr, size_t bytesize,
2728
unsigned int flags);
2829
CUresult (*cuMemFreeHost)(void *p);
@@ -34,6 +35,7 @@ struct libcu_ops {
3435
CUresult (*cuPointerGetAttributes)(unsigned int numAttributes,
3536
CUpointer_attribute *attributes,
3637
void **data, CUdeviceptr ptr);
38+
CUresult (*cuMemHostGetFlags)(unsigned int *pFlags, void *p);
3739
CUresult (*cuStreamSynchronize)(CUstream hStream);
3840
CUresult (*cuCtxSynchronize)(void);
3941
} libcu_ops;
@@ -72,6 +74,9 @@ struct DlHandleCloser {
7274
libcu_ops.cuMemAllocHost = [](auto... args) {
7375
return noop_stub(args...);
7476
};
77+
libcu_ops.cuMemHostAlloc = [](auto... args) {
78+
return noop_stub(args...);
79+
};
7580
libcu_ops.cuMemAllocManaged = [](auto... args) {
7681
return noop_stub(args...);
7782
};
@@ -90,6 +95,9 @@ struct DlHandleCloser {
9095
libcu_ops.cuPointerGetAttributes = [](auto... args) {
9196
return noop_stub(args...);
9297
};
98+
libcu_ops.cuMemHostGetFlags = [](auto... args) {
99+
return noop_stub(args...);
100+
};
93101
libcu_ops.cuStreamSynchronize = [](auto... args) {
94102
return noop_stub(args...);
95103
};
@@ -170,6 +178,12 @@ int InitCUDAOps() {
170178
fprintf(stderr, "cuMemAllocHost_v2 symbol not found in %s\n", lib_name);
171179
return -1;
172180
}
181+
*(void **)&libcu_ops.cuMemHostAlloc =
182+
utils_get_symbol_addr(cuDlHandle.get(), "cuMemHostAlloc", lib_name);
183+
if (libcu_ops.cuMemHostAlloc == nullptr) {
184+
fprintf(stderr, "cuMemHostAlloc symbol not found in %s\n", lib_name);
185+
return -1;
186+
}
173187
*(void **)&libcu_ops.cuMemAllocManaged =
174188
utils_get_symbol_addr(cuDlHandle.get(), "cuMemAllocManaged", lib_name);
175189
if (libcu_ops.cuMemAllocManaged == nullptr) {
@@ -208,6 +222,12 @@ int InitCUDAOps() {
208222
lib_name);
209223
return -1;
210224
}
225+
*(void **)&libcu_ops.cuMemHostGetFlags =
226+
utils_get_symbol_addr(cuDlHandle.get(), "cuMemHostGetFlags", lib_name);
227+
if (libcu_ops.cuMemHostGetFlags == nullptr) {
228+
fprintf(stderr, "cuMemHostGetFlags symbol not found in %s\n", lib_name);
229+
return -1;
230+
}
211231
*(void **)&libcu_ops.cuStreamSynchronize = utils_get_symbol_addr(
212232
cuDlHandle.get(), "cuStreamSynchronize", lib_name);
213233
if (libcu_ops.cuStreamSynchronize == nullptr) {
@@ -237,13 +257,15 @@ int InitCUDAOps() {
237257
libcu_ops.cuDeviceGet = cuDeviceGet;
238258
libcu_ops.cuMemAlloc = cuMemAlloc;
239259
libcu_ops.cuMemAllocHost = cuMemAllocHost;
260+
libcu_ops.cuMemHostAlloc = cuMemHostAlloc;
240261
libcu_ops.cuMemAllocManaged = cuMemAllocManaged;
241262
libcu_ops.cuMemFree = cuMemFree;
242263
libcu_ops.cuMemFreeHost = cuMemFreeHost;
243264
libcu_ops.cuMemsetD32 = cuMemsetD32;
244265
libcu_ops.cuMemcpy = cuMemcpy;
245266
libcu_ops.cuPointerGetAttribute = cuPointerGetAttribute;
246267
libcu_ops.cuPointerGetAttributes = cuPointerGetAttributes;
268+
libcu_ops.cuMemHostGetFlags = cuMemHostGetFlags;
247269
libcu_ops.cuStreamSynchronize = cuStreamSynchronize;
248270
libcu_ops.cuCtxSynchronize = cuCtxSynchronize;
249271

@@ -373,6 +395,17 @@ umf_usm_memory_type_t get_mem_type(CUcontext context, void *ptr) {
373395
return UMF_MEMORY_TYPE_UNKNOWN;
374396
}
375397

398+
unsigned int get_mem_host_alloc_flags(void *ptr) {
399+
unsigned int flags;
400+
CUresult res = libcu_ops.cuMemHostGetFlags(&flags, ptr);
401+
if (res != CUDA_SUCCESS) {
402+
fprintf(stderr, "cuPointerGetAttribute() failed!\n");
403+
return 0;
404+
}
405+
406+
return flags;
407+
}
408+
376409
CUcontext get_mem_context(void *ptr) {
377410
CUcontext context;
378411
CUresult res = libcu_ops.cuPointerGetAttribute(

test/providers/cuda_helpers.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ int cuda_copy(CUcontext context, CUdevice device, void *dst_ptr,
4242

4343
umf_usm_memory_type_t get_mem_type(CUcontext context, void *ptr);
4444

45+
unsigned int get_mem_host_alloc_flags(void *ptr);
46+
4547
CUcontext get_mem_context(void *ptr);
4648

4749
CUcontext get_current_context();

0 commit comments

Comments
 (0)