Skip to content

Commit 4817b3f

Browse files
authored
[SYCL] Improve get_kernel_bundle performance (#5496)
The patch improves performance of get_kernel_bundle version which takes a list of kernel_id's. There are three main improvements: 1. For faster search of binary image for given kernel ids a special map is prebuilt during initial image registration. 2. To avoid constructing a vector of kernel_id's for a device image each time a special map is prebuilt during initial image registration. 3. The vector of kernel_id's for device images is now shared to avoid unnecessary copies.
1 parent 13a7455 commit 4817b3f

File tree

7 files changed

+147
-140
lines changed

7 files changed

+147
-140
lines changed

sycl/source/backend.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ make_kernel_bundle(pi_native_handle NativeHandle, const context &TargetContext,
187187
// this by pre-building the device image and extracting kernel info. We can't
188188
// do the same to user images, since they may contain references to undefined
189189
// symbols (e.g. when kernel_bundle is supposed to be joined with another).
190-
std::vector<kernel_id> KernelIDs{};
190+
auto KernelIDs = std::make_shared<std::vector<kernel_id>>();
191191
auto DevImgImpl = std::make_shared<device_image_impl>(
192192
nullptr, TargetContext, Devices, State, KernelIDs, PiProgram);
193193
device_image_plain DevImg{DevImgImpl};

sycl/source/detail/device_image_impl.hpp

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@ __SYCL_INLINE_NAMESPACE(cl) {
3232
namespace sycl {
3333
namespace detail {
3434

35+
template <class T> struct LessByHash {
36+
bool operator()(const T &LHS, const T &RHS) const {
37+
return getSyclObjImpl(LHS) < getSyclObjImpl(RHS);
38+
}
39+
};
40+
3541
// The class is impl counterpart for sycl::device_image
3642
// It can represent a program in different states, kernel_id's it has and state
3743
// of specialization constants for it
@@ -51,7 +57,8 @@ class device_image_impl {
5157

5258
device_image_impl(const RTDeviceBinaryImage *BinImage, context Context,
5359
std::vector<device> Devices, bundle_state State,
54-
std::vector<kernel_id> KernelIDs, RT::PiProgram Program)
60+
std::shared_ptr<std::vector<kernel_id>> KernelIDs,
61+
RT::PiProgram Program)
5562
: MBinImage(BinImage), MContext(std::move(Context)),
5663
MDevices(std::move(Devices)), MState(State), MProgram(Program),
5764
MKernelIDs(std::move(KernelIDs)) {
@@ -60,17 +67,17 @@ class device_image_impl {
6067

6168
device_image_impl(const RTDeviceBinaryImage *BinImage, context Context,
6269
std::vector<device> Devices, bundle_state State,
63-
std::vector<kernel_id> KernelIDs, RT::PiProgram Program,
64-
const SpecConstMapT &SpecConstMap,
70+
std::shared_ptr<std::vector<kernel_id>> KernelIDs,
71+
RT::PiProgram Program, const SpecConstMapT &SpecConstMap,
6572
const std::vector<unsigned char> &SpecConstsBlob)
6673
: MBinImage(BinImage), MContext(std::move(Context)),
6774
MDevices(std::move(Devices)), MState(State), MProgram(Program),
6875
MKernelIDs(std::move(KernelIDs)), MSpecConstsBlob(SpecConstsBlob),
6976
MSpecConstSymMap(SpecConstMap) {}
7077

7178
bool has_kernel(const kernel_id &KernelIDCand) const noexcept {
72-
return std::binary_search(MKernelIDs.begin(), MKernelIDs.end(),
73-
KernelIDCand, LessByNameComp{});
79+
return std::binary_search(MKernelIDs->begin(), MKernelIDs->end(),
80+
KernelIDCand, LessByHash<kernel_id>{});
7481
}
7582

7683
bool has_kernel(const kernel_id &KernelIDCand,
@@ -83,7 +90,7 @@ class device_image_impl {
8390
}
8491

8592
const std::vector<kernel_id> &get_kernel_ids() const noexcept {
86-
return MKernelIDs;
93+
return *MKernelIDs;
8794
}
8895

8996
bool has_specialization_constants() const noexcept {
@@ -176,7 +183,9 @@ class device_image_impl {
176183

177184
const context &get_context() const noexcept { return MContext; }
178185

179-
std::vector<kernel_id> &get_kernel_ids_ref() noexcept { return MKernelIDs; }
186+
std::shared_ptr<std::vector<kernel_id>> &get_kernel_ids_ptr() noexcept {
187+
return MKernelIDs;
188+
}
180189

181190
std::vector<unsigned char> &get_spec_const_blob_ref() noexcept {
182191
return MSpecConstsBlob;
@@ -312,7 +321,7 @@ class device_image_impl {
312321
RT::PiProgram MProgram = nullptr;
313322
// List of kernel ids available in this image, elements should be sorted
314323
// according to LessByNameComp
315-
std::vector<kernel_id> MKernelIDs;
324+
std::shared_ptr<std::vector<kernel_id>> MKernelIDs;
316325

317326
// A mutex for sycnhronizing access to spec constants blob. Mutable because
318327
// needs to be locked in the const method for getting spec constant value.

sycl/source/detail/kernel_bundle_impl.hpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,6 @@ __SYCL_INLINE_NAMESPACE(cl) {
2828
namespace sycl {
2929
namespace detail {
3030

31-
template <class T> struct LessByHash {
32-
bool operator()(const T &LHS, const T &RHS) const {
33-
return getSyclObjImpl(LHS) < getSyclObjImpl(RHS);
34-
}
35-
};
36-
3731
static bool checkAllDevicesAreInContext(const std::vector<device> &Devices,
3832
const context &Context) {
3933
const std::vector<device> &ContextDevices = Context.get_devices();

0 commit comments

Comments
 (0)