Skip to content

Commit 48f0e93

Browse files
[SYCL][UR][CUDA][HIP] Add support for command-buffer kernel updates (#15287)
Updates the call to urCommandBufferAppendKernelLaunchExp to use the new UR parameters. Corresponding UR PR: oneapi-src/unified-runtime#1924 --------- Co-authored-by: Aaron Greig <[email protected]>
1 parent 489d95e commit 48f0e93

File tree

5 files changed

+36
-13
lines changed

5 files changed

+36
-13
lines changed

sycl/cmake/modules/FetchUnifiedRuntime.cmake

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -117,13 +117,13 @@ if(SYCL_UR_USE_FETCH_CONTENT)
117117
endfunction()
118118

119119
set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
120-
# commit 22962057df1b9d538e08088a7b75d9d8e7c29f90 (HEAD, origin/main, origin/HEAD)
121-
# Merge: e824ddc2 f0a1c433
120+
# commit 532a4ecb72da4876cef61a4ae4d638e27ad609d5
121+
# Merge: 22962057 d944ff33
122122
# Author: aarongreig <[email protected]>
123-
# Date: Fri Sep 27 16:54:04 2024 +0100
124-
# Merge pull request #2017 from nrspruit/new_sysman_init
125-
# [L0] Use zesInit for SysMan API usage
126-
set(UNIFIED_RUNTIME_TAG 22962057df1b9d538e08088a7b75d9d8e7c29f90)
123+
# Date: Mon Sep 30 10:43:10 2024 +0100
124+
# Merge pull request #1924 from Bensuo/fabio/cmd_buffer_kernel_update
125+
# Add support for command-buffer kernel updates
126+
set(UNIFIED_RUNTIME_TAG 532a4ecb72da4876cef61a4ae4d638e27ad609d5)
127127

128128
set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
129129
# Due to the use of dependentloadflag and no installer for UMF and hwloc we need

sycl/source/detail/device_impl.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -704,17 +704,26 @@ bool device_impl::has(aspect Aspect) const {
704704
return CallSuccessful && Result != nullptr;
705705
}
706706
case aspect::ext_oneapi_graph: {
707-
bool SupportsCommandBufferUpdate = false;
707+
ur_device_command_buffer_update_capability_flags_t UpdateCapabilities;
708708
bool CallSuccessful =
709709
getAdapter()->call_nocheck<UrApiKind::urDeviceGetInfo>(
710-
MDevice, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP,
711-
sizeof(SupportsCommandBufferUpdate), &SupportsCommandBufferUpdate,
710+
MDevice, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP,
711+
sizeof(UpdateCapabilities), &UpdateCapabilities,
712712
nullptr) == UR_RESULT_SUCCESS;
713713
if (!CallSuccessful) {
714714
return false;
715715
}
716716

717-
return has(aspect::ext_oneapi_limited_graph) && SupportsCommandBufferUpdate;
717+
/* The kernel handle update capability is not yet required for the
718+
* ext_oneapi_graph aspect */
719+
ur_device_command_buffer_update_capability_flags_t RequiredCapabilities =
720+
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS |
721+
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE |
722+
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE |
723+
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET;
724+
725+
return has(aspect::ext_oneapi_limited_graph) &&
726+
(UpdateCapabilities & RequiredCapabilities) == RequiredCapabilities;
718727
}
719728
case aspect::ext_oneapi_limited_graph: {
720729
bool SupportsCommandBuffers = false;

sycl/source/detail/graph_impl.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1474,6 +1474,7 @@ void exec_graph_impl::updateImpl(std::shared_ptr<node_impl> Node) {
14741474
}
14751475
}
14761476

1477+
UpdateDesc.hNewKernel = UrKernel;
14771478
UpdateDesc.numNewMemObjArgs = MemobjDescs.size();
14781479
UpdateDesc.pNewMemObjArgList = MemobjDescs.data();
14791480
UpdateDesc.numNewPointerArgs = PtrDescs.size();

sycl/source/detail/scheduler/commands.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2538,7 +2538,7 @@ ur_result_t enqueueImpCommandBufferKernel(
25382538
ur_result_t Res =
25392539
Adapter->call_nocheck<UrApiKind::urCommandBufferAppendKernelLaunchExp>(
25402540
CommandBuffer, UrKernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0],
2541-
&NDRDesc.GlobalSize[0], LocalSize, SyncPoints.size(),
2541+
&NDRDesc.GlobalSize[0], LocalSize, 0, nullptr, SyncPoints.size(),
25422542
SyncPoints.size() ? SyncPoints.data() : nullptr, OutSyncPoint,
25432543
OutCommand);
25442544

sycl/unittests/helpers/UrMock.hpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,14 +200,27 @@ inline ur_result_t mock_urDeviceGetInfo(void *pParams) {
200200
case UR_DEVICE_INFO_AVAILABLE:
201201
case UR_DEVICE_INFO_LINKER_AVAILABLE:
202202
case UR_DEVICE_INFO_COMPILER_AVAILABLE:
203-
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP:
204-
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: {
203+
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: {
205204
if (*params->ppPropValue)
206205
*static_cast<ur_bool_t *>(*params->ppPropValue) = true;
207206
if (*params->ppPropSizeRet)
208207
**params->ppPropSizeRet = sizeof(true);
209208
return UR_RESULT_SUCCESS;
210209
}
210+
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: {
211+
if (*params->ppPropValue)
212+
*static_cast<ur_device_command_buffer_update_capability_flags_t *>(
213+
*params->ppPropValue) =
214+
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS |
215+
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE |
216+
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE |
217+
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET |
218+
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE;
219+
if (*params->ppPropSizeRet)
220+
**params->ppPropSizeRet =
221+
sizeof(ur_device_command_buffer_update_capability_flags_t);
222+
return UR_RESULT_SUCCESS;
223+
}
211224
// This mock GPU device has no sub-devices
212225
case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: {
213226
if (*params->ppPropSizeRet) {

0 commit comments

Comments
 (0)