Skip to content

Commit 47f5338

Browse files
[SYCL] Use shared_ptr instead of manual changing UR counters (#18565)
Keep kernel and program handle in a structure that lifetime is controlled by shared_ptr. This is faster wrt current implementation, because only one atomic operation is required for copying-destroying shared_ptr, while pair of kernel/program retain/release calls requires 2 atomic operations in the best case. --------- Signed-off-by: Alexandr Konovalov <[email protected]> Co-authored-by: aelovikov-intel <[email protected]>
1 parent c096d26 commit 47f5338

File tree

9 files changed

+100
-100
lines changed

9 files changed

+100
-100
lines changed

sycl/source/detail/graph_impl.cpp

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1480,8 +1480,7 @@ bool exec_graph_impl::needsScheduledUpdate(
14801480
}
14811481

14821482
void exec_graph_impl::populateURKernelUpdateStructs(
1483-
const std::shared_ptr<node_impl> &Node,
1484-
std::pair<ur_program_handle_t, ur_kernel_handle_t> &BundleObjs,
1483+
const std::shared_ptr<node_impl> &Node, FastKernelCacheValPtr &BundleObjs,
14851484
std::vector<ur_exp_command_buffer_update_memobj_arg_desc_t> &MemobjDescs,
14861485
std::vector<ur_kernel_arg_mem_obj_properties_t> &MemobjProps,
14871486
std::vector<ur_exp_command_buffer_update_pointer_arg_desc_t> &PtrDescs,
@@ -1517,12 +1516,11 @@ void exec_graph_impl::populateURKernelUpdateStructs(
15171516
UrKernel = SyclKernelImpl->getHandleRef();
15181517
EliminatedArgMask = SyclKernelImpl->getKernelArgMask();
15191518
} else {
1520-
ur_program_handle_t UrProgram = nullptr;
1521-
std::tie(UrKernel, std::ignore, EliminatedArgMask, UrProgram) =
1522-
sycl::detail::ProgramManager::getInstance().getOrCreateKernel(
1523-
ContextImpl, DeviceImpl, ExecCG.MKernelName,
1524-
ExecCG.MKernelNameBasedCachePtr);
1525-
BundleObjs = std::make_pair(UrProgram, UrKernel);
1519+
BundleObjs = sycl::detail::ProgramManager::getInstance().getOrCreateKernel(
1520+
ContextImpl, DeviceImpl, ExecCG.MKernelName,
1521+
ExecCG.MKernelNameBasedCachePtr);
1522+
UrKernel = BundleObjs->MKernelHandle;
1523+
EliminatedArgMask = BundleObjs->MKernelArgMask;
15261524
}
15271525

15281526
// Remove eliminated args
@@ -1717,8 +1715,7 @@ void exec_graph_impl::updateURImpl(
17171715
std::vector<sycl::detail::NDRDescT> NDRDescList(NumUpdatableNodes);
17181716
std::vector<ur_exp_command_buffer_update_kernel_launch_desc_t> UpdateDescList(
17191717
NumUpdatableNodes);
1720-
std::vector<std::pair<ur_program_handle_t, ur_kernel_handle_t>>
1721-
KernelBundleObjList(NumUpdatableNodes);
1718+
std::vector<FastKernelCacheValPtr> KernelBundleObjList(NumUpdatableNodes);
17221719

17231720
size_t StructListIndex = 0;
17241721
for (auto &Node : Nodes) {
@@ -1743,17 +1740,6 @@ void exec_graph_impl::updateURImpl(
17431740
const sycl::detail::AdapterPtr &Adapter = ContextImpl->getAdapter();
17441741
Adapter->call<sycl::detail::UrApiKind::urCommandBufferUpdateKernelLaunchExp>(
17451742
CommandBuffer, UpdateDescList.size(), UpdateDescList.data());
1746-
1747-
for (auto &BundleObjs : KernelBundleObjList) {
1748-
// We retained these objects by inside populateUpdateStruct() by calling
1749-
// getOrCreateKernel()
1750-
if (auto &UrKernel = BundleObjs.second; nullptr != UrKernel) {
1751-
Adapter->call<sycl::detail::UrApiKind::urKernelRelease>(UrKernel);
1752-
}
1753-
if (auto &UrProgram = BundleObjs.first; nullptr != UrProgram) {
1754-
Adapter->call<sycl::detail::UrApiKind::urProgramRelease>(UrProgram);
1755-
}
1756-
}
17571743
}
17581744

17591745
modifiable_command_graph::modifiable_command_graph(

sycl/source/detail/graph_impl.hpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1521,8 +1521,7 @@ class exec_graph_impl {
15211521
/// @param[out] NDRDesc ND-Range to update.
15221522
/// @param[out] UpdateDesc Base struct in the pointer chain.
15231523
void populateURKernelUpdateStructs(
1524-
const std::shared_ptr<node_impl> &Node,
1525-
std::pair<ur_program_handle_t, ur_kernel_handle_t> &BundleObjs,
1524+
const std::shared_ptr<node_impl> &Node, FastKernelCacheValPtr &BundleObjs,
15261525
std::vector<ur_exp_command_buffer_update_memobj_arg_desc_t> &MemobjDescs,
15271526
std::vector<ur_kernel_arg_mem_obj_properties_t> &MemobjProps,
15281527
std::vector<ur_exp_command_buffer_update_pointer_arg_desc_t> &PtrDescs,

sycl/source/detail/kernel_name_based_cache_t.hpp

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,47 @@ namespace sycl {
2020
inline namespace _V1 {
2121
namespace detail {
2222
using FastKernelCacheKeyT = std::pair<ur_device_handle_t, ur_context_handle_t>;
23-
using FastKernelCacheValT =
24-
std::tuple<ur_kernel_handle_t, std::mutex *, const KernelArgMask *,
25-
ur_program_handle_t>;
23+
24+
struct FastKernelCacheVal {
25+
ur_kernel_handle_t MKernelHandle; /* UR kernel handle pointer. */
26+
std::mutex *MMutex; /* Mutex guarding this kernel. When
27+
caching is disabled, the pointer is
28+
nullptr. */
29+
const KernelArgMask *MKernelArgMask; /* Eliminated kernel argument mask. */
30+
ur_program_handle_t MProgramHandle; /* UR program handle corresponding to
31+
this kernel. */
32+
const Adapter &MAdapterPtr; /* We can keep reference to the adapter
33+
because during 2-stage shutdown the kernel
34+
cache is destroyed deliberately before the
35+
adapter. */
36+
37+
FastKernelCacheVal(ur_kernel_handle_t KernelHandle, std::mutex *Mutex,
38+
const KernelArgMask *KernelArgMask,
39+
ur_program_handle_t ProgramHandle,
40+
const Adapter &AdapterPtr)
41+
: MKernelHandle(KernelHandle), MMutex(Mutex),
42+
MKernelArgMask(KernelArgMask), MProgramHandle(ProgramHandle),
43+
MAdapterPtr(AdapterPtr) {}
44+
45+
~FastKernelCacheVal() {
46+
if (MKernelHandle)
47+
MAdapterPtr.call<sycl::detail::UrApiKind::urKernelRelease>(MKernelHandle);
48+
if (MProgramHandle)
49+
MAdapterPtr.call<sycl::detail::UrApiKind::urProgramRelease>(
50+
MProgramHandle);
51+
MKernelHandle = nullptr;
52+
MMutex = nullptr;
53+
MKernelArgMask = nullptr;
54+
MProgramHandle = nullptr;
55+
}
56+
57+
FastKernelCacheVal(const FastKernelCacheVal &) = delete;
58+
FastKernelCacheVal &operator=(const FastKernelCacheVal &) = delete;
59+
};
60+
using FastKernelCacheValPtr = std::shared_ptr<FastKernelCacheVal>;
61+
2662
using FastKernelSubcacheMapT =
27-
::boost::unordered_flat_map<FastKernelCacheKeyT, FastKernelCacheValT>;
63+
::boost::unordered_flat_map<FastKernelCacheKeyT, FastKernelCacheValPtr>;
2864

2965
using FastKernelSubcacheMutexT = SpinLock;
3066
using FastKernelSubcacheReadLockT = std::lock_guard<FastKernelSubcacheMutexT>;

sycl/source/detail/kernel_program_cache.hpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,7 @@ class KernelProgramCache {
468468
return std::make_pair(It->second, DidInsert);
469469
}
470470

471-
FastKernelCacheValT
471+
FastKernelCacheValPtr
472472
tryToGetKernelFast(KernelNameStrRefT KernelName, ur_device_handle_t Device,
473473
FastKernelSubcacheT *KernelSubcacheHint) {
474474
FastKernelCacheWriteLockT Lock(MFastKernelCacheMutex);
@@ -486,27 +486,27 @@ class KernelProgramCache {
486486
traceKernel("Kernel fetched.", KernelName, true);
487487
return It->second;
488488
}
489-
return std::make_tuple(nullptr, nullptr, nullptr, nullptr);
489+
return FastKernelCacheValPtr();
490490
}
491491

492492
void saveKernel(KernelNameStrRefT KernelName, ur_device_handle_t Device,
493-
FastKernelCacheValT CacheVal,
493+
const FastKernelCacheValPtr &CacheVal,
494494
FastKernelSubcacheT *KernelSubcacheHint) {
495-
ur_program_handle_t Program = std::get<3>(CacheVal);
496495
if (SYCLConfig<SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD>::
497496
isProgramCacheEvictionEnabled()) {
498497
// Save kernel in fast cache only if the corresponding program is also
499498
// in the cache.
500499
auto LockedCache = acquireCachedPrograms();
501500
auto &ProgCache = LockedCache.get();
502-
if (ProgCache.ProgramSizeMap.find(Program) ==
501+
if (ProgCache.ProgramSizeMap.find(CacheVal->MProgramHandle) ==
503502
ProgCache.ProgramSizeMap.end())
504503
return;
505504
}
506505

507506
// Save reference between the program and the fast cache key.
508507
FastKernelCacheWriteLockT Lock(MFastKernelCacheMutex);
509-
MProgramToFastKernelCacheKeyMap[Program].emplace_back(KernelName, Device);
508+
MProgramToFastKernelCacheKeyMap[CacheVal->MProgramHandle].emplace_back(
509+
KernelName, Device);
510510

511511
// if no insertion took place, then some other thread has already inserted
512512
// smth in the cache
@@ -518,7 +518,7 @@ class KernelProgramCache {
518518
FastKernelSubcacheWriteLockT SubcacheLock{KernelSubcacheHint->Mutex};
519519
ur_context_handle_t Context = getURContext();
520520
KernelSubcacheHint->Map.emplace(FastKernelCacheKeyT(Device, Context),
521-
std::move(CacheVal));
521+
CacheVal);
522522
}
523523

524524
// Expects locked program cache

sycl/source/detail/program_manager/program_manager.cpp

Lines changed: 14 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
#include <detail/device_impl.hpp>
1414
#include <detail/event_impl.hpp>
1515
#include <detail/global_handler.hpp>
16-
#include <detail/kernel_name_based_cache_t.hpp>
1716
#include <detail/persistent_device_code_cache.hpp>
1817
#include <detail/platform_impl.hpp>
1918
#include <detail/program_manager/program_manager.hpp>
@@ -1108,11 +1107,8 @@ ur_program_handle_t ProgramManager::getBuiltURProgram(
11081107
Adapter->call<UrApiKind::urProgramRetain>(ResProgram);
11091108
return ResProgram;
11101109
}
1111-
// When caching is enabled, the returned UrProgram and UrKernel will
1112-
// already have their ref count incremented.
1113-
std::tuple<ur_kernel_handle_t, std::mutex *, const KernelArgMask *,
1114-
ur_program_handle_t>
1115-
ProgramManager::getOrCreateKernel(
1110+
1111+
FastKernelCacheValPtr ProgramManager::getOrCreateKernel(
11161112
const ContextImplPtr &ContextImpl, device_impl &DeviceImpl,
11171113
KernelNameStrRefT KernelName,
11181114
KernelNameBasedCacheT *KernelNameBasedCachePtr, const NDRDescT &NDRDesc) {
@@ -1129,18 +1125,11 @@ ProgramManager::getOrCreateKernel(
11291125
KernelNameBasedCachePtr ? &KernelNameBasedCachePtr->FastKernelSubcache
11301126
: nullptr;
11311127
if (SYCLConfig<SYCL_CACHE_IN_MEM>::get()) {
1132-
auto ret_tuple =
1128+
auto KernelCacheValPtr =
11331129
Cache.tryToGetKernelFast(KernelName, UrDevice, CacheHintPtr);
1134-
constexpr size_t Kernel = 0; // see FastKernelCacheValT tuple
1135-
constexpr size_t Program = 3; // see FastKernelCacheValT tuple
1136-
if (std::get<Kernel>(ret_tuple)) {
1137-
// Pulling a copy of a kernel and program from the cache,
1138-
// so we need to retain those resources.
1139-
ContextImpl->getAdapter()->call<UrApiKind::urKernelRetain>(
1140-
std::get<Kernel>(ret_tuple));
1141-
ContextImpl->getAdapter()->call<UrApiKind::urProgramRetain>(
1142-
std::get<Program>(ret_tuple));
1143-
return ret_tuple;
1130+
if (auto KernelCacheValPtr =
1131+
Cache.tryToGetKernelFast(KernelName, UrDevice, CacheHintPtr)) {
1132+
return KernelCacheValPtr;
11441133
}
11451134
}
11461135

@@ -1179,20 +1168,21 @@ ProgramManager::getOrCreateKernel(
11791168
// threads when caching is disabled, so we can return
11801169
// nullptr for the mutex.
11811170
auto [Kernel, ArgMask] = BuildF();
1182-
return make_tuple(Kernel, nullptr, ArgMask, Program);
1171+
return std::make_shared<FastKernelCacheVal>(
1172+
Kernel, nullptr, ArgMask, Program, *ContextImpl->getAdapter().get());
11831173
}
11841174

11851175
auto BuildResult = Cache.getOrBuild<errc::invalid>(GetCachedBuildF, BuildF);
11861176
// getOrBuild is not supposed to return nullptr
11871177
assert(BuildResult != nullptr && "Invalid build result");
11881178
const KernelArgMaskPairT &KernelArgMaskPair = BuildResult->Val;
1189-
auto ret_val = std::make_tuple(KernelArgMaskPair.first,
1190-
&(BuildResult->MBuildResultMutex),
1191-
KernelArgMaskPair.second, Program);
1179+
auto ret_val = std::make_shared<FastKernelCacheVal>(
1180+
KernelArgMaskPair.first, &(BuildResult->MBuildResultMutex),
1181+
KernelArgMaskPair.second, Program, *ContextImpl->getAdapter().get());
11921182
// If caching is enabled, one copy of the kernel handle will be
1193-
// stored in the cache, and one handle is returned to the
1194-
// caller. In that case, we need to increase the ref count of the
1195-
// kernel.
1183+
// stored in FastKernelCacheVal, and one is in
1184+
// KernelProgramCache::MKernelsPerProgramCache. To cover
1185+
// MKernelsPerProgramCache, we need to increase the ref count of the kernel.
11961186
ContextImpl->getAdapter()->call<UrApiKind::urKernelRetain>(
11971187
KernelArgMaskPair.first);
11981188
Cache.saveKernel(KernelName, UrDevice, ret_val, CacheHintPtr);

sycl/source/detail/program_manager/program_manager.hpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,7 @@ class ProgramManager {
198198
const DevImgPlainWithDeps *DevImgWithDeps = nullptr,
199199
const SerializedObj &SpecConsts = {});
200200

201-
std::tuple<ur_kernel_handle_t, std::mutex *, const KernelArgMask *,
202-
ur_program_handle_t>
201+
FastKernelCacheValPtr
203202
getOrCreateKernel(const ContextImplPtr &ContextImpl, device_impl &DeviceImpl,
204203
KernelNameStrRefT KernelName,
205204
KernelNameBasedCacheT *KernelNameBasedCachePtr,

sycl/source/detail/scheduler/commands.cpp

Lines changed: 22 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1991,8 +1991,6 @@ void instrumentationAddExtraKernelMetadata(
19911991
auto FilterArgs = [&Args](detail::ArgDesc &Arg, int NextTrueIndex) {
19921992
Args.push_back({Arg.MType, Arg.MPtr, Arg.MSize, NextTrueIndex});
19931993
};
1994-
ur_kernel_handle_t Kernel = nullptr;
1995-
std::mutex *KernelMutex = nullptr;
19961994
const KernelArgMask *EliminatedArgMask = nullptr;
19971995

19981996
if (nullptr != SyclKernel) {
@@ -2007,11 +2005,11 @@ void instrumentationAddExtraKernelMetadata(
20072005
// NOTE: Queue can be null when kernel is directly enqueued to a command
20082006
// buffer
20092007
// by graph API, when a modifiable graph is finalized.
2010-
ur_program_handle_t Program = nullptr;
2011-
std::tie(Kernel, KernelMutex, EliminatedArgMask, Program) =
2008+
FastKernelCacheValPtr FastKernelCacheVal =
20122009
detail::ProgramManager::getInstance().getOrCreateKernel(
20132010
Queue->getContextImplPtr(), Queue->getDeviceImpl(), KernelName,
20142011
KernelNameBasedCachePtr);
2012+
EliminatedArgMask = FastKernelCacheVal->MKernelArgMask;
20152013
}
20162014

20172015
applyFuncOnFilteredArgs(EliminatedArgMask, CGArgs, FilterArgs);
@@ -2522,8 +2520,7 @@ static std::tuple<ur_kernel_handle_t, std::shared_ptr<device_image_impl>,
25222520
const KernelArgMask *>
25232521
getCGKernelInfo(const CGExecKernel &CommandGroup, ContextImplPtr ContextImpl,
25242522
device_impl &DeviceImpl,
2525-
std::vector<ur_kernel_handle_t> &UrKernelsToRelease,
2526-
std::vector<ur_program_handle_t> &UrProgramsToRelease) {
2523+
std::vector<FastKernelCacheValPtr> &KernelCacheValsToRelease) {
25272524

25282525
ur_kernel_handle_t UrKernel = nullptr;
25292526
std::shared_ptr<device_image_impl> DeviceImageImpl = nullptr;
@@ -2542,13 +2539,14 @@ getCGKernelInfo(const CGExecKernel &CommandGroup, ContextImplPtr ContextImpl,
25422539
DeviceImageImpl = SyclKernelImpl->getDeviceImage();
25432540
EliminatedArgMask = SyclKernelImpl->getKernelArgMask();
25442541
} else {
2545-
ur_program_handle_t UrProgram = nullptr;
2546-
std::tie(UrKernel, std::ignore, EliminatedArgMask, UrProgram) =
2542+
FastKernelCacheValPtr FastKernelCacheVal =
25472543
sycl::detail::ProgramManager::getInstance().getOrCreateKernel(
25482544
ContextImpl, DeviceImpl, CommandGroup.MKernelName,
25492545
CommandGroup.MKernelNameBasedCachePtr);
2550-
UrKernelsToRelease.push_back(UrKernel);
2551-
UrProgramsToRelease.push_back(UrProgram);
2546+
UrKernel = FastKernelCacheVal->MKernelHandle;
2547+
EliminatedArgMask = FastKernelCacheVal->MKernelArgMask;
2548+
// To keep UrKernel valid, we return FastKernelCacheValPtr.
2549+
KernelCacheValsToRelease.push_back(std::move(FastKernelCacheVal));
25522550
}
25532551
return std::make_tuple(UrKernel, DeviceImageImpl, EliminatedArgMask);
25542552
}
@@ -2561,20 +2559,18 @@ ur_result_t enqueueImpCommandBufferKernel(
25612559
ur_exp_command_buffer_sync_point_t *OutSyncPoint,
25622560
ur_exp_command_buffer_command_handle_t *OutCommand,
25632561
const std::function<void *(Requirement *Req)> &getMemAllocationFunc) {
2564-
// List of ur objects to be released after UR call. We don't do anything
2565-
// with the ur_program_handle_t objects, but need to update their reference
2566-
// count.
2567-
std::vector<ur_kernel_handle_t> UrKernelsToRelease;
2568-
std::vector<ur_program_handle_t> UrProgramsToRelease;
2562+
// List of fast cache elements to be released after UR call. We don't do
2563+
// anything with them, but they must exist to keep ur_kernel_handle_t-s
2564+
// valid.
2565+
std::vector<FastKernelCacheValPtr> FastKernelCacheValsToRelease;
25692566

25702567
ur_kernel_handle_t UrKernel = nullptr;
25712568
std::shared_ptr<device_image_impl> DeviceImageImpl = nullptr;
25722569
const KernelArgMask *EliminatedArgMask = nullptr;
25732570

25742571
auto ContextImpl = sycl::detail::getSyclObjImpl(Ctx);
2575-
std::tie(UrKernel, DeviceImageImpl, EliminatedArgMask) =
2576-
getCGKernelInfo(CommandGroup, ContextImpl, DeviceImpl, UrKernelsToRelease,
2577-
UrProgramsToRelease);
2572+
std::tie(UrKernel, DeviceImageImpl, EliminatedArgMask) = getCGKernelInfo(
2573+
CommandGroup, ContextImpl, DeviceImpl, FastKernelCacheValsToRelease);
25782574

25792575
// Build up the list of UR kernel handles that the UR command could be
25802576
// updated to use.
@@ -2588,7 +2584,7 @@ ur_result_t enqueueImpCommandBufferKernel(
25882584
ur_kernel_handle_t AltUrKernel = nullptr;
25892585
std::tie(AltUrKernel, std::ignore, std::ignore) =
25902586
getCGKernelInfo(*AltCGKernel.get(), ContextImpl, DeviceImpl,
2591-
UrKernelsToRelease, UrProgramsToRelease);
2587+
FastKernelCacheValsToRelease);
25922588
AltUrKernels.push_back(AltUrKernel);
25932589
}
25942590

@@ -2649,13 +2645,6 @@ ur_result_t enqueueImpCommandBufferKernel(
26492645
nullptr, OutSyncPoint, nullptr,
26502646
CommandBufferDesc.isUpdatable ? OutCommand : nullptr);
26512647

2652-
for (auto &Kernel : UrKernelsToRelease) {
2653-
Adapter->call<UrApiKind::urKernelRelease>(Kernel);
2654-
}
2655-
for (auto &Program : UrProgramsToRelease) {
2656-
Adapter->call<UrApiKind::urProgramRelease>(Program);
2657-
}
2658-
26592648
if (Res != UR_RESULT_SUCCESS) {
26602649
detail::enqueue_kernel_launch::handleErrorOrWarning(Res, DeviceImpl,
26612650
UrKernel, NDRDesc);
@@ -2687,6 +2676,7 @@ void enqueueImpKernel(
26872676

26882677
std::shared_ptr<kernel_impl> SyclKernelImpl;
26892678
std::shared_ptr<device_image_impl> DeviceImageImpl;
2679+
FastKernelCacheValPtr KernelCacheVal;
26902680

26912681
if (nullptr != MSyclKernel) {
26922682
assert(MSyclKernel->get_info<info::kernel::context>() ==
@@ -2714,10 +2704,12 @@ void enqueueImpKernel(
27142704
EliminatedArgMask = SyclKernelImpl->getKernelArgMask();
27152705
KernelMutex = SyclKernelImpl->getCacheMutex();
27162706
} else {
2717-
std::tie(Kernel, KernelMutex, EliminatedArgMask, Program) =
2718-
detail::ProgramManager::getInstance().getOrCreateKernel(
2719-
ContextImpl, DeviceImpl, KernelName, KernelNameBasedCachePtr,
2720-
NDRDesc);
2707+
KernelCacheVal = detail::ProgramManager::getInstance().getOrCreateKernel(
2708+
ContextImpl, DeviceImpl, KernelName, KernelNameBasedCachePtr, NDRDesc);
2709+
Kernel = KernelCacheVal->MKernelHandle;
2710+
KernelMutex = KernelCacheVal->MMutex;
2711+
Program = KernelCacheVal->MProgramHandle;
2712+
EliminatedArgMask = KernelCacheVal->MKernelArgMask;
27212713
}
27222714

27232715
// We may need more events for the launch, so we make another reference.
@@ -2762,12 +2754,6 @@ void enqueueImpKernel(
27622754
KernelIsCooperative, KernelUsesClusterLaunch, WorkGroupMemorySize,
27632755
BinImage, KernelName, KernelFuncPtr, KernelNumArgs,
27642756
KernelParamDescGetter, KernelHasSpecialCaptures);
2765-
2766-
const AdapterPtr &Adapter = Queue->getAdapter();
2767-
if (!SyclKernelImpl && !MSyclKernel) {
2768-
Adapter->call<UrApiKind::urKernelRelease>(Kernel);
2769-
Adapter->call<UrApiKind::urProgramRelease>(Program);
2770-
}
27712757
}
27722758
if (UR_RESULT_SUCCESS != Error) {
27732759
// If we have got non-success error code, let's analyze it to emit nice

0 commit comments

Comments
 (0)