Skip to content

[UR][Cuda][Hip] Add support for command-buffer kernel updates #15287

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Oct 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions sycl/cmake/modules/FetchUnifiedRuntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,13 @@ if(SYCL_UR_USE_FETCH_CONTENT)
endfunction()

set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
# commit 22962057df1b9d538e08088a7b75d9d8e7c29f90 (HEAD, origin/main, origin/HEAD)
# Merge: e824ddc2 f0a1c433
# commit 532a4ecb72da4876cef61a4ae4d638e27ad609d5
# Merge: 22962057 d944ff33
# Author: aarongreig <[email protected]>
# Date: Fri Sep 27 16:54:04 2024 +0100
# Merge pull request #2017 from nrspruit/new_sysman_init
# [L0] Use zesInit for SysMan API usage
set(UNIFIED_RUNTIME_TAG 22962057df1b9d538e08088a7b75d9d8e7c29f90)
# Date: Mon Sep 30 10:43:10 2024 +0100
# Merge pull request #1924 from Bensuo/fabio/cmd_buffer_kernel_update
# Add support for command-buffer kernel updates
set(UNIFIED_RUNTIME_TAG 532a4ecb72da4876cef61a4ae4d638e27ad609d5)

set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
# Due to the use of dependentloadflag and no installer for UMF and hwloc we need
Expand Down
17 changes: 13 additions & 4 deletions sycl/source/detail/device_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -704,17 +704,26 @@ bool device_impl::has(aspect Aspect) const {
return CallSuccessful && Result != nullptr;
}
case aspect::ext_oneapi_graph: {
bool SupportsCommandBufferUpdate = false;
ur_device_command_buffer_update_capability_flags_t UpdateCapabilities;
bool CallSuccessful =
getAdapter()->call_nocheck<UrApiKind::urDeviceGetInfo>(
MDevice, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP,
sizeof(SupportsCommandBufferUpdate), &SupportsCommandBufferUpdate,
MDevice, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP,
sizeof(UpdateCapabilities), &UpdateCapabilities,
nullptr) == UR_RESULT_SUCCESS;
if (!CallSuccessful) {
return false;
}

return has(aspect::ext_oneapi_limited_graph) && SupportsCommandBufferUpdate;
/* The kernel handle update capability is not yet required for the
* ext_oneapi_graph aspect */
ur_device_command_buffer_update_capability_flags_t RequiredCapabilities =
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS |
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE |
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE |
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET;

return has(aspect::ext_oneapi_limited_graph) &&
(UpdateCapabilities & RequiredCapabilities) == RequiredCapabilities;
}
case aspect::ext_oneapi_limited_graph: {
bool SupportsCommandBuffers = false;
Expand Down
1 change: 1 addition & 0 deletions sycl/source/detail/graph_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1474,6 +1474,7 @@ void exec_graph_impl::updateImpl(std::shared_ptr<node_impl> Node) {
}
}

UpdateDesc.hNewKernel = UrKernel;
UpdateDesc.numNewMemObjArgs = MemobjDescs.size();
UpdateDesc.pNewMemObjArgList = MemobjDescs.data();
UpdateDesc.numNewPointerArgs = PtrDescs.size();
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2538,7 +2538,7 @@ ur_result_t enqueueImpCommandBufferKernel(
ur_result_t Res =
Adapter->call_nocheck<UrApiKind::urCommandBufferAppendKernelLaunchExp>(
CommandBuffer, UrKernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0],
&NDRDesc.GlobalSize[0], LocalSize, SyncPoints.size(),
&NDRDesc.GlobalSize[0], LocalSize, 0, nullptr, SyncPoints.size(),
SyncPoints.size() ? SyncPoints.data() : nullptr, OutSyncPoint,
OutCommand);

Expand Down
17 changes: 15 additions & 2 deletions sycl/unittests/helpers/UrMock.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,14 +200,27 @@ inline ur_result_t mock_urDeviceGetInfo(void *pParams) {
case UR_DEVICE_INFO_AVAILABLE:
case UR_DEVICE_INFO_LINKER_AVAILABLE:
case UR_DEVICE_INFO_COMPILER_AVAILABLE:
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP:
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: {
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: {
if (*params->ppPropValue)
*static_cast<ur_bool_t *>(*params->ppPropValue) = true;
if (*params->ppPropSizeRet)
**params->ppPropSizeRet = sizeof(true);
return UR_RESULT_SUCCESS;
}
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: {
if (*params->ppPropValue)
*static_cast<ur_device_command_buffer_update_capability_flags_t *>(
*params->ppPropValue) =
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS |
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE |
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE |
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET |
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE;
if (*params->ppPropSizeRet)
**params->ppPropSizeRet =
sizeof(ur_device_command_buffer_update_capability_flags_t);
return UR_RESULT_SUCCESS;
}
// This mock GPU device has no sub-devices
case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: {
if (*params->ppPropSizeRet) {
Expand Down
Loading