Skip to content

Commit 2b387e4

Browse files
committed
[SYCL] Support sycl::kernel_bundle for multi-device scenario
This PR includes: * Changes in the program manager methods to be able to properly create/build UR program for multiple devices. So far, we were mostly using only the first device in the vector to create/build UR program which made UR program unusable on other devices. * For SPIRV case we already have all necessary UR fuctions for multi-device case: urProgramBuildExp, urProgramLinkExp. For AOT case we need to add new function urProgramCreateWithBinaryExp which allows to create UR program from multiple device binaries. Hence the UR tag update. * Our program cache key allowed only a single device. I have changed it to contain a set of devices. If UR program is created and built for a set of devices then the same UR program is usable whenver we have any subset of this set. That's why if we have a program built for a set of devices then add all subsets to the cache. Before we were adding a record to the cache for each device from the set which is incorrect. For example, if someone requests a UR program for {dev2, dev3} from the cache then it is expected that this UR progam must be usable to submit a kernel to dev3. But we could get a program for {dev1, dev2} from the cache which is unusable on dev3.
1 parent a04915e commit 2b387e4

File tree

15 files changed

+550
-201
lines changed

15 files changed

+550
-201
lines changed

sycl/cmake/modules/FetchUnifiedRuntime.cmake

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -116,14 +116,8 @@ if(SYCL_UR_USE_FETCH_CONTENT)
116116
CACHE PATH "Path to external '${name}' adapter source dir" FORCE)
117117
endfunction()
118118

119-
set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
120-
# commit 22962057df1b9d538e08088a7b75d9d8e7c29f90 (HEAD, origin/main, origin/HEAD)
121-
# Merge: e824ddc2 f0a1c433
122-
# Author: aarongreig <[email protected]>
123-
# Date: Fri Sep 27 16:54:04 2024 +0100
124-
# Merge pull request #2017 from nrspruit/new_sysman_init
125-
# [L0] Use zesInit for SysMan API usage
126-
set(UNIFIED_RUNTIME_TAG 22962057df1b9d538e08088a7b75d9d8e7c29f90)
119+
set(UNIFIED_RUNTIME_REPO "https://github.com/againull/unified-runtime")
120+
set(UNIFIED_RUNTIME_TAG 2b1cf6ee0bb43e0a94359eda5e163445d691b42a)
127121

128122
set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
129123
# Due to the use of dependentloadflag and no installer for UMF and hwloc we need

sycl/source/detail/context_impl.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include <sycl/property_list.hpp>
2222

2323
#include <algorithm>
24+
#include <set>
2425

2526
namespace sycl {
2627
inline namespace _V1 {
@@ -490,7 +491,7 @@ std::optional<ur_program_handle_t> context_impl::getProgramForDevImgs(
490491
auto &Cache = LockedCache.get().Cache;
491492
ur_device_handle_t &DevHandle = getSyclObjImpl(Device)->getHandleRef();
492493
for (std::uintptr_t ImageIDs : ImgIdentifiers) {
493-
auto OuterKey = std::make_pair(ImageIDs, DevHandle);
494+
auto OuterKey = std::make_pair(ImageIDs, std::set{DevHandle});
494495
size_t NProgs = KeyMap.count(OuterKey);
495496
if (NProgs == 0)
496497
continue;

sycl/source/detail/helpers.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,8 @@ retrieveKernelBinary(const QueueImplPtr &Queue, const char *KernelName,
6565
auto DeviceImpl = Queue->getDeviceImplPtr();
6666
auto Device = detail::createSyclObjFromImpl<device>(DeviceImpl);
6767
ur_program_handle_t Program =
68-
detail::ProgramManager::getInstance().createURProgram(**DeviceImage,
69-
Context, Device);
68+
detail::ProgramManager::getInstance().createURProgram(
69+
**DeviceImage, Context, {Device});
7070
return {*DeviceImage, Program};
7171
}
7272

@@ -94,7 +94,7 @@ retrieveKernelBinary(const QueueImplPtr &Queue, const char *KernelName,
9494
DeviceImage = &detail::ProgramManager::getInstance().getDeviceImage(
9595
KernelName, Context, Device);
9696
Program = detail::ProgramManager::getInstance().createURProgram(
97-
*DeviceImage, Context, Device);
97+
*DeviceImage, Context, {Device});
9898
}
9999
return {DeviceImage, Program};
100100
}

sycl/source/detail/kernel_program_cache.hpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <atomic>
2121
#include <condition_variable>
2222
#include <mutex>
23+
#include <set>
2324
#include <type_traits>
2425

2526
#include <boost/unordered/unordered_flat_map.hpp>
@@ -118,9 +119,10 @@ class KernelProgramCache {
118119
* when debugging environment variables are set and we can just ignore them
119120
* since all kernels will have their build options overridden with the same
120121
* string*/
121-
using ProgramCacheKeyT =
122-
std::pair<std::pair<SerializedObj, std::uintptr_t>, ur_device_handle_t>;
123-
using CommonProgramKeyT = std::pair<std::uintptr_t, ur_device_handle_t>;
122+
using ProgramCacheKeyT = std::pair<std::pair<SerializedObj, std::uintptr_t>,
123+
std::set<ur_device_handle_t>>;
124+
using CommonProgramKeyT =
125+
std::pair<std::uintptr_t, std::set<ur_device_handle_t>>;
124126

125127
struct ProgramCache {
126128
::boost::unordered_map<ProgramCacheKeyT, ProgramBuildResultPtr> Cache;

0 commit comments

Comments
 (0)