Skip to content

Commit f867e71

Browse files
committed
[UR][Cuda] Add support for command-buffer kernel updates
1 parent 71bc163 commit f867e71

File tree

5 files changed

+32
-9
lines changed

5 files changed

+32
-9
lines changed

sycl/cmake/modules/FetchUnifiedRuntime.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,15 +116,15 @@ if(SYCL_UR_USE_FETCH_CONTENT)
116116
CACHE PATH "Path to external '${name}' adapter source dir" FORCE)
117117
endfunction()
118118

119-
set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
119+
set(UNIFIED_RUNTIME_REPO "https://github.com/Bensuo/unified-runtime.git")
120120
# commit 2ad32681efd2c977f2c1f7f3a30d572d4c15499c
121121
# Author: Hugh Delaney <[email protected]>
122122
# Date: Wed Sep 25 15:55:05 2024 +0100
123123
# Associate queue with device in context (#1992)
124124
# Making a native queue doesn't require hDevice to be non null, but this
125125
# associates the queue with a null device, even if hContext contains valid
126126
# devices.
127-
set(UNIFIED_RUNTIME_TAG 2ad32681efd2c977f2c1f7f3a30d572d4c15499c)
127+
set(UNIFIED_RUNTIME_TAG d944ff3391dfbe69db453406bd0bbcb78716dee0)
128128

129129
set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
130130
# Due to the use of dependentloadflag and no installer for UMF and hwloc we need

sycl/source/detail/device_impl.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -704,17 +704,26 @@ bool device_impl::has(aspect Aspect) const {
704704
return CallSuccessful && Result != nullptr;
705705
}
706706
case aspect::ext_oneapi_graph: {
707-
bool SupportsCommandBufferUpdate = false;
707+
ur_device_command_buffer_update_capability_flags_t UpdateCapabilities;
708708
bool CallSuccessful =
709709
getAdapter()->call_nocheck<UrApiKind::urDeviceGetInfo>(
710-
MDevice, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP,
711-
sizeof(SupportsCommandBufferUpdate), &SupportsCommandBufferUpdate,
710+
MDevice, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP,
711+
sizeof(UpdateCapabilities), &UpdateCapabilities,
712712
nullptr) == UR_RESULT_SUCCESS;
713713
if (!CallSuccessful) {
714714
return false;
715715
}
716716

717-
return has(aspect::ext_oneapi_limited_graph) && SupportsCommandBufferUpdate;
717+
/* The kernel handle update capability is not yet required for the
718+
* ext_oneapi_graph aspect */
719+
ur_device_command_buffer_update_capability_flags_t RequiredCapabilities =
720+
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS |
721+
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE |
722+
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE |
723+
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET;
724+
725+
return has(aspect::ext_oneapi_limited_graph) &&
726+
(UpdateCapabilities & RequiredCapabilities) == RequiredCapabilities;
718727
}
719728
case aspect::ext_oneapi_limited_graph: {
720729
bool SupportsCommandBuffers = false;

sycl/source/detail/graph_impl.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1474,6 +1474,7 @@ void exec_graph_impl::updateImpl(std::shared_ptr<node_impl> Node) {
14741474
}
14751475
}
14761476

1477+
UpdateDesc.hNewKernel = UrKernel;
14771478
UpdateDesc.numNewMemObjArgs = MemobjDescs.size();
14781479
UpdateDesc.pNewMemObjArgList = MemobjDescs.data();
14791480
UpdateDesc.numNewPointerArgs = PtrDescs.size();

sycl/source/detail/scheduler/commands.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2538,7 +2538,7 @@ ur_result_t enqueueImpCommandBufferKernel(
25382538
ur_result_t Res =
25392539
Adapter->call_nocheck<UrApiKind::urCommandBufferAppendKernelLaunchExp>(
25402540
CommandBuffer, UrKernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0],
2541-
&NDRDesc.GlobalSize[0], LocalSize, SyncPoints.size(),
2541+
&NDRDesc.GlobalSize[0], LocalSize, 0, nullptr, SyncPoints.size(),
25422542
SyncPoints.size() ? SyncPoints.data() : nullptr, OutSyncPoint,
25432543
OutCommand);
25442544

sycl/unittests/helpers/UrMock.hpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,14 +200,27 @@ inline ur_result_t mock_urDeviceGetInfo(void *pParams) {
200200
case UR_DEVICE_INFO_AVAILABLE:
201201
case UR_DEVICE_INFO_LINKER_AVAILABLE:
202202
case UR_DEVICE_INFO_COMPILER_AVAILABLE:
203-
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP:
204-
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: {
203+
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: {
205204
if (*params->ppPropValue)
206205
*static_cast<ur_bool_t *>(*params->ppPropValue) = true;
207206
if (*params->ppPropSizeRet)
208207
**params->ppPropSizeRet = sizeof(true);
209208
return UR_RESULT_SUCCESS;
210209
}
210+
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: {
211+
if (*params->ppPropValue)
212+
*static_cast<ur_device_command_buffer_update_capability_flags_t *>(
213+
*params->ppPropValue) =
214+
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS |
215+
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE |
216+
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE |
217+
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET |
218+
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE;
219+
if (*params->ppPropSizeRet)
220+
**params->ppPropSizeRet =
221+
sizeof(ur_device_command_buffer_update_capability_flags_t);
222+
return UR_RESULT_SUCCESS;
223+
}
211224
// This mock GPU device has no sub-devices
212225
case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: {
213226
if (*params->ppPropSizeRet) {

0 commit comments

Comments
 (0)