Skip to content

Commit db41224

Browse files
committed
[UR] Add urUSMContextMemcpyExp API and basic l0 implementation.
1 parent 194ec74 commit db41224

34 files changed

+532
-6
lines changed

unified-runtime/include/ur_api.h

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,8 @@ typedef enum ur_function_t {
453453
UR_FUNCTION_USM_POOL_TRIM_TO_EXP = 261,
454454
/// Enumerator for ::urUSMPoolGetInfoExp
455455
UR_FUNCTION_USM_POOL_GET_INFO_EXP = 262,
456+
/// Enumerator for ::urUSMContextMemcpyExp
457+
UR_FUNCTION_USM_CONTEXT_MEMCPY_EXP = 264,
456458
/// @cond
457459
UR_FUNCTION_FORCE_UINT32 = 0x7fffffff
458460
/// @endcond
@@ -2320,6 +2322,8 @@ typedef enum ur_device_info_t {
23202322
/// [::ur_bool_t] returns true if the device supports enqueueing of
23212323
/// allocations and frees.
23222324
UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_EXP = 0x2050,
2325+
/// [::ur_bool_t] returns true if the device supports ::urUSMContextMemcpy
2326+
UR_DEVICE_INFO_USM_CONTEXT_MEMCPY_SUPPORT_EXP = 0x7000,
23232327
/// @cond
23242328
UR_DEVICE_INFO_FORCE_UINT32 = 0x7fffffff
23252329
/// @endcond
@@ -2345,7 +2349,7 @@ typedef enum ur_device_info_t {
23452349
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
23462350
/// + `NULL == hDevice`
23472351
/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION
2348-
/// + `::UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_EXP < propName`
2352+
/// + `::UR_DEVICE_INFO_USM_CONTEXT_MEMCPY_SUPPORT_EXP < propName`
23492353
/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION
23502354
/// + If `propName` is not supported by the adapter.
23512355
/// - ::UR_RESULT_ERROR_INVALID_SIZE
@@ -12153,6 +12157,39 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp(
1215312157
/// [out][alloc] pointer to handle of program object created.
1215412158
ur_program_handle_t *phProgram);
1215512159

12160+
#if !defined(__GNUC__)
12161+
#pragma endregion
12162+
#endif
12163+
// Intel 'oneAPI' Unified Runtime Experimental APIs for USM Context Memcpy
12164+
#if !defined(__GNUC__)
12165+
#pragma region usm_context_memcpy_(experimental)
12166+
#endif
12167+
///////////////////////////////////////////////////////////////////////////////
12168+
/// @brief Enable access to peer device memory
12169+
///
12170+
/// @returns
12171+
/// - ::UR_RESULT_SUCCESS
12172+
/// - ::UR_RESULT_ERROR_UNINITIALIZED
12173+
/// - ::UR_RESULT_ERROR_DEVICE_LOST
12174+
/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
12175+
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
12176+
/// + `NULL == hContext`
12177+
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
12178+
/// + `NULL == pDst`
12179+
/// + `NULL == pSrc`
12180+
/// - ::UR_RESULT_SUCCESS
12181+
/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
12182+
UR_APIEXPORT ur_result_t UR_APICALL urUSMContextMemcpyExp(
12183+
/// [in] Context associated with the device(s) that own the allocations
12184+
/// `pSrc` and `pDst`.
12185+
ur_context_handle_t hContext,
12186+
/// [in] Destination pointer to copy to.
12187+
void *pDst,
12188+
/// [in] Source pointer to copy from.
12189+
const void *pSrc,
12190+
/// [in] Size in bytes to be copied.
12191+
size_t size);
12192+
1215612193
#if !defined(__GNUC__)
1215712194
#pragma endregion
1215812195
#endif
@@ -14282,6 +14319,17 @@ typedef struct ur_usm_pitched_alloc_exp_params_t {
1428214319
size_t **ppResultPitch;
1428314320
} ur_usm_pitched_alloc_exp_params_t;
1428414321

14322+
///////////////////////////////////////////////////////////////////////////////
14323+
/// @brief Function parameters for urUSMContextMemcpyExp
14324+
/// @details Each entry is a pointer to the parameter passed to the function;
14325+
/// allowing the callback the ability to modify the parameter's value
14326+
typedef struct ur_usm_context_memcpy_exp_params_t {
14327+
ur_context_handle_t *phContext;
14328+
void **ppDst;
14329+
const void **ppSrc;
14330+
size_t *psize;
14331+
} ur_usm_context_memcpy_exp_params_t;
14332+
1428514333
///////////////////////////////////////////////////////////////////////////////
1428614334
/// @brief Function parameters for urUSMImportExp
1428714335
/// @details Each entry is a pointer to the parameter passed to the function;

unified-runtime/include/ur_api_funcs.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ _UR_API(urUSMPoolSetDevicePoolExp)
157157
_UR_API(urUSMPoolGetDevicePoolExp)
158158
_UR_API(urUSMPoolTrimToExp)
159159
_UR_API(urUSMPitchedAllocExp)
160+
_UR_API(urUSMContextMemcpyExp)
160161
_UR_API(urUSMImportExp)
161162
_UR_API(urUSMReleaseExp)
162163
_UR_API(urBindlessImagesUnsampledImageHandleDestroyExp)

unified-runtime/include/ur_ddi.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1338,6 +1338,11 @@ typedef ur_result_t(UR_APICALL *ur_pfnUSMPitchedAllocExp_t)(
13381338
ur_context_handle_t, ur_device_handle_t, const ur_usm_desc_t *,
13391339
ur_usm_pool_handle_t, size_t, size_t, size_t, void **, size_t *);
13401340

1341+
///////////////////////////////////////////////////////////////////////////////
1342+
/// @brief Function-pointer for urUSMContextMemcpyExp
1343+
typedef ur_result_t(UR_APICALL *ur_pfnUSMContextMemcpyExp_t)(
1344+
ur_context_handle_t, void *, const void *, size_t);
1345+
13411346
///////////////////////////////////////////////////////////////////////////////
13421347
/// @brief Function-pointer for urUSMImportExp
13431348
typedef ur_result_t(UR_APICALL *ur_pfnUSMImportExp_t)(ur_context_handle_t,
@@ -1360,6 +1365,7 @@ typedef struct ur_usm_exp_dditable_t {
13601365
ur_pfnUSMPoolGetDevicePoolExp_t pfnPoolGetDevicePoolExp;
13611366
ur_pfnUSMPoolTrimToExp_t pfnPoolTrimToExp;
13621367
ur_pfnUSMPitchedAllocExp_t pfnPitchedAllocExp;
1368+
ur_pfnUSMContextMemcpyExp_t pfnContextMemcpyExp;
13631369
ur_pfnUSMImportExp_t pfnImportExp;
13641370
ur_pfnUSMReleaseExp_t pfnReleaseExp;
13651371
} ur_usm_exp_dditable_t;

unified-runtime/include/ur_print.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2897,6 +2897,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmPitchedAllocExpParams(
28972897
const struct ur_usm_pitched_alloc_exp_params_t *params, char *buffer,
28982898
const size_t buff_size, size_t *out_size);
28992899

2900+
///////////////////////////////////////////////////////////////////////////////
2901+
/// @brief Print ur_usm_context_memcpy_exp_params_t struct
2902+
/// @returns
2903+
/// - ::UR_RESULT_SUCCESS
2904+
/// - ::UR_RESULT_ERROR_INVALID_SIZE
2905+
/// - `buff_size < out_size`
2906+
UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmContextMemcpyExpParams(
2907+
const struct ur_usm_context_memcpy_exp_params_t *params, char *buffer,
2908+
const size_t buff_size, size_t *out_size);
2909+
29002910
///////////////////////////////////////////////////////////////////////////////
29012911
/// @brief Print ur_usm_import_exp_params_t struct
29022912
/// @returns

unified-runtime/include/ur_print.hpp

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1222,6 +1222,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) {
12221222
case UR_FUNCTION_USM_POOL_GET_INFO_EXP:
12231223
os << "UR_FUNCTION_USM_POOL_GET_INFO_EXP";
12241224
break;
1225+
case UR_FUNCTION_USM_CONTEXT_MEMCPY_EXP:
1226+
os << "UR_FUNCTION_USM_CONTEXT_MEMCPY_EXP";
1227+
break;
12251228
default:
12261229
os << "unknown enumerator";
12271230
break;
@@ -3032,6 +3035,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) {
30323035
case UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_EXP:
30333036
os << "UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_EXP";
30343037
break;
3038+
case UR_DEVICE_INFO_USM_CONTEXT_MEMCPY_SUPPORT_EXP:
3039+
os << "UR_DEVICE_INFO_USM_CONTEXT_MEMCPY_SUPPORT_EXP";
3040+
break;
30353041
default:
30363042
os << "unknown enumerator";
30373043
break;
@@ -5036,6 +5042,19 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr,
50365042

50375043
os << ")";
50385044
} break;
5045+
case UR_DEVICE_INFO_USM_CONTEXT_MEMCPY_SUPPORT_EXP: {
5046+
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
5047+
if (sizeof(ur_bool_t) > size) {
5048+
os << "invalid size (is: " << size
5049+
<< ", expected: >=" << sizeof(ur_bool_t) << ")";
5050+
return UR_RESULT_ERROR_INVALID_SIZE;
5051+
}
5052+
os << (const void *)(tptr) << " (";
5053+
5054+
os << *tptr;
5055+
5056+
os << ")";
5057+
} break;
50395058
default:
50405059
os << "unknown enumerator";
50415060
return UR_RESULT_ERROR_INVALID_ENUMERATION;
@@ -17459,6 +17478,36 @@ inline std::ostream &operator<<(
1745917478
return os;
1746017479
}
1746117480

17481+
///////////////////////////////////////////////////////////////////////////////
17482+
/// @brief Print operator for the ur_usm_context_memcpy_exp_params_t type
17483+
/// @returns
17484+
/// std::ostream &
17485+
inline std::ostream &operator<<(
17486+
std::ostream &os,
17487+
[[maybe_unused]] const struct ur_usm_context_memcpy_exp_params_t *params) {
17488+
17489+
os << ".hContext = ";
17490+
17491+
ur::details::printPtr(os, *(params->phContext));
17492+
17493+
os << ", ";
17494+
os << ".pDst = ";
17495+
17496+
ur::details::printPtr(os, *(params->ppDst));
17497+
17498+
os << ", ";
17499+
os << ".pSrc = ";
17500+
17501+
ur::details::printPtr(os, *(params->ppSrc));
17502+
17503+
os << ", ";
17504+
os << ".size = ";
17505+
17506+
os << *(params->psize);
17507+
17508+
return os;
17509+
}
17510+
1746217511
///////////////////////////////////////////////////////////////////////////////
1746317512
/// @brief Print operator for the ur_usm_import_exp_params_t type
1746417513
/// @returns
@@ -20708,6 +20757,9 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os,
2070820757
case UR_FUNCTION_USM_PITCHED_ALLOC_EXP: {
2070920758
os << (const struct ur_usm_pitched_alloc_exp_params_t *)params;
2071020759
} break;
20760+
case UR_FUNCTION_USM_CONTEXT_MEMCPY_EXP: {
20761+
os << (const struct ur_usm_context_memcpy_exp_params_t *)params;
20762+
} break;
2071120763
case UR_FUNCTION_USM_IMPORT_EXP: {
2071220764
os << (const struct ur_usm_import_exp_params_t *)params;
2071320765
} break;
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
<%
2+
OneApi=tags['$OneApi']
3+
x=tags['$x']
4+
X=x.upper()
5+
%>
6+
7+
.. _experimental-usm-context-memcpy:
8+
9+
================================================================================
10+
USM Context Memcpy
11+
================================================================================
12+
13+
.. warning::
14+
15+
Experimental features:
16+
17+
* May be replaced, updated, or removed at any time.
18+
* Do not require maintaining API/ABI stability of their own additions over
19+
time.
20+
* Do not require conformance testing of their own additions.
21+
22+
23+
Motivation
24+
--------------------------------------------------------------------------------
25+
26+
In order to support device globals there's a need for a blocking USM write
27+
operation that doesn't need a queue. This is to facilitate fast initialization
28+
of the device global memory via native APIs that enable this kind of operation.
29+
30+
API
31+
--------------------------------------------------------------------------------
32+
33+
Enums
34+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
35+
36+
* ${x}_device_info_t
37+
* ${X}_DEVICE_INFO_USM_CONTEXT_MEMCPY_SUPPORT_EXP
38+
39+
Functions
40+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
41+
* ${x}USMContextMemcpyExp
42+
43+
Changelog
44+
--------------------------------------------------------------------------------
45+
46+
+-----------+---------------------------+
47+
| Revision | Changes |
48+
+===========+===========================+
49+
| 1.0 | Initial Draft |
50+
+-----------+---------------------------+
51+
52+
53+
Support
54+
--------------------------------------------------------------------------------
55+
56+
Adapters which support this experimental feature *must* return true for the new
57+
``${X}_DEVICE_INFO_USM_CONTEXT_MEMCPY_SUPPORT_EXP`` device info query.
58+
59+
60+
Contributors
61+
--------------------------------------------------------------------------------
62+
63+
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#
2+
# Copyright (C) 2025 Intel Corporation
3+
#
4+
# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
5+
# See LICENSE.TXT
6+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
#
8+
# See YaML.md for syntax definition
9+
#
10+
--- #--------------------------------------------------------------------------
11+
type: header
12+
desc: "Intel $OneApi Unified Runtime Experimental APIs for USM Context Memcpy"
13+
ordinal: "99"
14+
--- #--------------------------------------------------------------------------
15+
type: enum
16+
extend: true
17+
typed_etors: true
18+
desc: "Extension enums to $x_device_info_t to support $xUSMContextMemcpy"
19+
name: $x_device_info_t
20+
etors:
21+
- name: USM_CONTEXT_MEMCPY_SUPPORT_EXP
22+
value: "0x7000"
23+
desc: "[$x_bool_t] returns true if the device supports $xUSMContextMemcpyExp"
24+
--- #--------------------------------------------------------------------------
25+
type: function
26+
desc: "Perform a synchronous, blocking memcpy operation between USM allocations."
27+
class: $xUSM
28+
name: ContextMemcpyExp
29+
ordinal: "0"
30+
params:
31+
- type: $x_context_handle_t
32+
name: hContext
33+
desc: "[in] Context associated with the device(s) that own the allocations `pSrc` and `pDst`."
34+
- type: void*
35+
name: pDst
36+
desc: "[in] Destination pointer to copy to."
37+
- type: const void*
38+
name: pSrc
39+
desc: "[in] Source pointer to copy from."
40+
- type: size_t
41+
name: size
42+
desc: "[in] Size in bytes to be copied."
43+
returns:
44+
- $X_RESULT_SUCCESS
45+
- $X_RESULT_ERROR_ADAPTER_SPECIFIC

unified-runtime/scripts/core/registry.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,9 @@ etors:
637637
- name: USM_POOL_GET_INFO_EXP
638638
desc: Enumerator for $xUSMPoolGetInfoExp
639639
value: '262'
640+
- name: USM_CONTEXT_MEMCPY_EXP
641+
desc: Enumerator for $xUSMContextMemcpyExp
642+
value: '264'
640643
---
641644
type: enum
642645
desc: Defines structure types

unified-runtime/source/adapters/cuda/device.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1118,6 +1118,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
11181118
}
11191119
case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP:
11201120
return ReturnValue(false);
1121+
case UR_DEVICE_INFO_USM_CONTEXT_MEMCPY_SUPPORT_EXP:
1122+
return ReturnValue(false);
11211123
default:
11221124
break;
11231125
}

unified-runtime/source/adapters/cuda/ur_interface_loader.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable(
369369
pDdiTable->pfnPoolSetDevicePoolExp = urUSMPoolSetDevicePoolExp;
370370
pDdiTable->pfnPoolGetDevicePoolExp = urUSMPoolGetDevicePoolExp;
371371
pDdiTable->pfnPoolTrimToExp = urUSMPoolTrimToExp;
372+
pDdiTable->pfnContextMemcpyExp = urUSMContextMemcpyExp;
372373
return UR_RESULT_SUCCESS;
373374
}
374375

unified-runtime/source/adapters/cuda/usm.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,3 +459,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolTrimToExp(ur_context_handle_t,
459459
size_t) {
460460
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
461461
}
462+
463+
UR_APIEXPORT ur_result_t UR_APICALL urUSMContextMemcpyExp(ur_context_handle_t,
464+
void *, const void *,
465+
size_t) {
466+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
467+
}

unified-runtime/source/adapters/hip/device.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1104,9 +1104,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
11041104
}
11051105
case UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP:
11061106
return ReturnValue(false);
1107-
case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP: {
1107+
case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP:
1108+
return ReturnValue(false);
1109+
case UR_DEVICE_INFO_USM_CONTEXT_MEMCPY_SUPPORT_EXP:
11081110
return ReturnValue(false);
1109-
}
11101111
default:
11111112
break;
11121113
}

unified-runtime/source/adapters/hip/ur_interface_loader.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable(
367367
pDdiTable->pfnPoolSetDevicePoolExp = urUSMPoolSetDevicePoolExp;
368368
pDdiTable->pfnPoolGetDevicePoolExp = urUSMPoolGetDevicePoolExp;
369369
pDdiTable->pfnPoolTrimToExp = urUSMPoolTrimToExp;
370+
pDdiTable->pfnContextMemcpyExp = urUSMContextMemcpyExp;
370371
return UR_RESULT_SUCCESS;
371372
}
372373

unified-runtime/source/adapters/hip/usm.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -524,3 +524,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolTrimToExp(ur_context_handle_t,
524524
size_t) {
525525
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
526526
}
527+
528+
UR_APIEXPORT ur_result_t UR_APICALL urUSMContextMemcpyExp(ur_context_handle_t,
529+
void *, const void *,
530+
size_t) {
531+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
532+
}

0 commit comments

Comments
 (0)