Skip to content

Commit a003d29

Browse files
[SYCL] Optimize kernel name based cache lookup (#18081)
Remove the kernel name string related operations during the cache lookup by taking advantage of the kernel name type uniqueness. --------- Signed-off-by: Sergei Vinogradov <[email protected]> Co-authored-by: Sergei Vinogradov <[email protected]>
1 parent 7495bcf commit a003d29

25 files changed

+347
-117
lines changed
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
//==--------------------- kernel_name_based_cache.hpp ----------------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
#pragma once
9+
10+
#include <sycl/detail/export.hpp>
11+
12+
namespace sycl {
13+
inline namespace _V1 {
14+
namespace detail {
15+
16+
struct KernelNameBasedCacheT;
17+
__SYCL_EXPORT KernelNameBasedCacheT *createKernelNameBasedCache();
18+
19+
// Retrieves a cache pointer unique to a kernel name type that can be used to
20+
// avoid kernel name based lookup in the runtime.
21+
template <typename KernelName>
22+
KernelNameBasedCacheT *getKernelNameBasedCache() {
23+
static KernelNameBasedCacheT *Instance = createKernelNameBasedCache();
24+
return Instance;
25+
}
26+
27+
} // namespace detail
28+
} // namespace _V1
29+
} // namespace sycl

sycl/include/sycl/handler.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <sycl/detail/id_queries_fit_in_int.hpp>
1919
#include <sycl/detail/impl_utils.hpp>
2020
#include <sycl/detail/kernel_desc.hpp>
21+
#include <sycl/detail/kernel_name_based_cache.hpp>
2122
#include <sycl/detail/kernel_name_str_t.hpp>
2223
#include <sycl/detail/reduction_forward.hpp>
2324
#include <sycl/detail/string.hpp>
@@ -813,6 +814,7 @@ class __SYCL_EXPORT handler {
813814
// later during finalize.
814815
setArgsToAssociatedAccessors();
815816
}
817+
setKernelNameBasedCachePtr(detail::getKernelNameBasedCache<KernelName>());
816818

817819
// If the kernel lambda is callable with a kernel_handler argument, manifest
818820
// the associated kernel handler.
@@ -3789,6 +3791,9 @@ class __SYCL_EXPORT handler {
37893791
sycl::handler &h, size_t size,
37903792
const ext::oneapi::experimental::memory_pool &pool);
37913793

3794+
void setKernelNameBasedCachePtr(
3795+
detail::KernelNameBasedCacheT *KernelNameBasedCachePtr);
3796+
37923797
protected:
37933798
/// Registers event dependencies in this command group.
37943799
void depends_on(const detail::EventImplPtr &Event);

sycl/source/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ set(SYCL_COMMON_SOURCES
273273
"detail/kernel_compiler/kernel_compiler_opencl.cpp"
274274
"detail/kernel_compiler/kernel_compiler_sycl.cpp"
275275
"detail/kernel_impl.cpp"
276+
"detail/kernel_name_based_cache.cpp"
276277
"detail/kernel_program_cache.cpp"
277278
"detail/memory_manager.cpp"
278279
"detail/pipes.cpp"

sycl/source/detail/cg.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,7 @@ class CGExecKernel : public CG {
257257
std::shared_ptr<detail::kernel_bundle_impl> MKernelBundle;
258258
std::vector<ArgDesc> MArgs;
259259
KernelNameStrT MKernelName;
260+
KernelNameBasedCacheT *MKernelNameBasedCachePtr;
260261
std::vector<std::shared_ptr<detail::stream_impl>> MStreams;
261262
std::vector<std::shared_ptr<const void>> MAuxiliaryResources;
262263
/// Used to implement ext_oneapi_graph dynamic_command_group. Stores the list
@@ -272,6 +273,7 @@ class CGExecKernel : public CG {
272273
std::shared_ptr<detail::kernel_bundle_impl> KernelBundle,
273274
CG::StorageInitHelper CGData, std::vector<ArgDesc> Args,
274275
KernelNameStrT KernelName,
276+
KernelNameBasedCacheT *KernelNameBasedCachePtr,
275277
std::vector<std::shared_ptr<detail::stream_impl>> Streams,
276278
std::vector<std::shared_ptr<const void>> AuxiliaryResources,
277279
CGType Type, ur_kernel_cache_config_t KernelCacheConfig,
@@ -281,7 +283,9 @@ class CGExecKernel : public CG {
281283
MNDRDesc(std::move(NDRDesc)), MHostKernel(std::move(HKernel)),
282284
MSyclKernel(std::move(SyclKernel)),
283285
MKernelBundle(std::move(KernelBundle)), MArgs(std::move(Args)),
284-
MKernelName(std::move(KernelName)), MStreams(std::move(Streams)),
286+
MKernelName(std::move(KernelName)),
287+
MKernelNameBasedCachePtr(KernelNameBasedCachePtr),
288+
MStreams(std::move(Streams)),
285289
MAuxiliaryResources(std::move(AuxiliaryResources)),
286290
MAlternativeKernels{}, MKernelCacheConfig(std::move(KernelCacheConfig)),
287291
MKernelIsCooperative(KernelIsCooperative),

sycl/source/detail/global_handler.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <detail/adapter.hpp>
1515
#include <detail/config.hpp>
1616
#include <detail/global_handler.hpp>
17+
#include <detail/kernel_name_based_cache_t.hpp>
1718
#include <detail/platform_impl.hpp>
1819
#include <detail/program_manager/program_manager.hpp>
1920
#include <detail/scheduler/scheduler.hpp>
@@ -252,6 +253,15 @@ ThreadPool &GlobalHandler::getHostTaskThreadPool() {
252253
return TP;
253254
}
254255

256+
KernelNameBasedCacheT *GlobalHandler::createKernelNameBasedCache() {
257+
static std::vector<std::unique_ptr<KernelNameBasedCacheT>>
258+
&KernelNameBasedCaches = getOrCreate(MKernelNameBasedCaches);
259+
LockGuard LG{MKernelNameBasedCaches.Lock};
260+
return KernelNameBasedCaches
261+
.emplace_back(std::make_unique<KernelNameBasedCacheT>())
262+
.get();
263+
}
264+
255265
void GlobalHandler::releaseDefaultContexts() {
256266
// Release shared-pointers to SYCL objects.
257267
// Note that on Windows the destruction of the default context

sycl/source/detail/global_handler.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ class Adapter;
2626
class ods_target_list;
2727
class XPTIRegistry;
2828
class ThreadPool;
29+
struct KernelNameBasedCacheT;
2930

3031
using ContextImplPtr = std::shared_ptr<context_impl>;
3132
using AdapterPtr = std::shared_ptr<Adapter>;
@@ -73,7 +74,7 @@ class GlobalHandler {
7374
ods_target_list &getOneapiDeviceSelectorTargets(const std::string &InitValue);
7475
XPTIRegistry &getXPTIRegistry();
7576
ThreadPool &getHostTaskThreadPool();
76-
77+
KernelNameBasedCacheT *createKernelNameBasedCache();
7778
static void registerStaticVarShutdownHandler();
7879

7980
bool isOkToDefer() const;
@@ -129,6 +130,8 @@ class GlobalHandler {
129130
InstWithLock<XPTIRegistry> MXPTIRegistry;
130131
// Thread pool for host task and event callbacks execution
131132
InstWithLock<ThreadPool> MHostTaskThreadPool;
133+
InstWithLock<std::vector<std::unique_ptr<KernelNameBasedCacheT>>>
134+
MKernelNameBasedCaches;
132135
};
133136
} // namespace detail
134137
} // namespace _V1

sycl/source/detail/graph_impl.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -813,8 +813,8 @@ exec_graph_impl::enqueueNodeDirect(sycl::context Ctx,
813813
CGExec->MLine, CGExec->MColumn);
814814
std::tie(CmdTraceEvent, InstanceID) = emitKernelInstrumentationData(
815815
StreamID, CGExec->MSyclKernel, CodeLoc, CGExec->MIsTopCodeLoc,
816-
CGExec->MKernelName.data(), nullptr, CGExec->MNDRDesc,
817-
CGExec->MKernelBundle, CGExec->MArgs);
816+
CGExec->MKernelName.data(), CGExec->MKernelNameBasedCachePtr, nullptr,
817+
CGExec->MNDRDesc, CGExec->MKernelBundle, CGExec->MArgs);
818818
if (CmdTraceEvent)
819819
sycl::detail::emitInstrumentationGeneral(
820820
StreamID, InstanceID, CmdTraceEvent, xpti::trace_task_begin, nullptr);
@@ -1503,7 +1503,8 @@ void exec_graph_impl::populateURKernelUpdateStructs(
15031503
ur_program_handle_t UrProgram = nullptr;
15041504
std::tie(UrKernel, std::ignore, EliminatedArgMask, UrProgram) =
15051505
sycl::detail::ProgramManager::getInstance().getOrCreateKernel(
1506-
ContextImpl, DeviceImpl, ExecCG.MKernelName);
1506+
ContextImpl, DeviceImpl, ExecCG.MKernelName,
1507+
ExecCG.MKernelNameBasedCachePtr);
15071508
BundleObjs = std::make_pair(UrProgram, UrKernel);
15081509
}
15091510

sycl/source/detail/handler_impl.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,9 @@ class handler_impl {
205205
detail::kernel_param_desc_t (*MKernelParamDescGetter)(int) = nullptr;
206206
bool MKernelIsESIMD = false;
207207
bool MKernelHasSpecialCaptures = true;
208+
209+
// A pointer to a kernel name based cache retrieved on the application side.
210+
KernelNameBasedCacheT *MKernelNameBasedCachePtr;
208211
};
209212

210213
} // namespace detail
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//==--------------------- kernel_name_based_cache.cpp ----------------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <detail/global_handler.hpp>
10+
#include <sycl/detail/kernel_name_based_cache.hpp>
11+
12+
namespace sycl {
13+
inline namespace _V1 {
14+
namespace detail {
15+
16+
KernelNameBasedCacheT *createKernelNameBasedCache() {
17+
return GlobalHandler::instance().createKernelNameBasedCache();
18+
}
19+
20+
} // namespace detail
21+
} // namespace _V1
22+
} // namespace sycl
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
//==-------------------- kernel_name_based_cache_t.hpp ---------------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
#pragma once
9+
10+
#include <detail/kernel_arg_mask.hpp>
11+
#include <sycl/detail/spinlock.hpp>
12+
#include <sycl/detail/ur.hpp>
13+
14+
#include <mutex>
15+
16+
#include <boost/unordered/unordered_flat_map.hpp>
17+
18+
namespace sycl {
19+
inline namespace _V1 {
20+
namespace detail {
21+
using FastKernelCacheKeyT = std::pair<ur_device_handle_t, ur_context_handle_t>;
22+
using FastKernelCacheValT =
23+
std::tuple<ur_kernel_handle_t, std::mutex *, const KernelArgMask *,
24+
ur_program_handle_t>;
25+
using FastKernelSubcacheMapT =
26+
::boost::unordered_flat_map<FastKernelCacheKeyT, FastKernelCacheValT>;
27+
28+
using FastKernelSubcacheMutexT = SpinLock;
29+
using FastKernelSubcacheReadLockT = std::lock_guard<FastKernelSubcacheMutexT>;
30+
using FastKernelSubcacheWriteLockT = std::lock_guard<FastKernelSubcacheMutexT>;
31+
32+
struct FastKernelSubcacheT {
33+
FastKernelSubcacheMapT Map;
34+
FastKernelSubcacheMutexT Mutex;
35+
};
36+
37+
struct KernelNameBasedCacheT {
38+
FastKernelSubcacheT FastKernelSubcache;
39+
};
40+
41+
} // namespace detail
42+
} // namespace _V1
43+
} // namespace sycl

sycl/source/detail/kernel_program_cache.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@ namespace detail {
1616
const AdapterPtr &KernelProgramCache::getAdapter() {
1717
return MParentContext->getAdapter();
1818
}
19+
20+
ur_context_handle_t KernelProgramCache::getURContext() const {
21+
return MParentContext->getHandleRef();
22+
}
23+
1924
} // namespace detail
2025
} // namespace _V1
2126
} // namespace sycl

0 commit comments

Comments
 (0)