Skip to content

[SYCL] Add implementation of kernel_bundle. Part 2 #3287

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 15 additions & 11 deletions sycl/include/CL/sycl/kernel_bundle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,12 +164,15 @@ class __SYCL_EXPORT kernel_bundle_plain {

// Sets the specialization constant with specified ID to the value pointed by
// Value + ValueSize
void set_specialization_constant(unsigned int SpecID, const void *Value,
size_t ValueSize);
void set_specialization_constant_raw_value(unsigned int SpecID,
const void *Value,
size_t ValueSize);

// \returns pointer to the value of the specialization constant with specified
// ID
const void *get_specialization_constant(unsigned int SpecID) const;
void get_specialization_constant_raw_value(unsigned int SpecID,
void *ValueRet,
size_t ValueSize) const;

// \returns a kernel object which represents the kernel identified by
// kernel_id passed
Expand Down Expand Up @@ -274,8 +277,8 @@ class kernel_bundle : public detail::kernel_bundle_plain {
typename std::remove_reference_t<decltype(SpecName)>::type Value) {
assert(false && "set_specialization_constant is not implemented yet");
unsigned int SpecID = 0; // TODO: Convert SpecName to a numeric ID
return kernel_bundle_plain::set_specialization_constant(SpecID, &Value,
sizeof(Value));
return kernel_bundle_plain::set_specialization_constant_raw_value(
SpecID, &Value, sizeof(Value));
}

/// The value of the specialization constant whose address is SpecName for
Expand All @@ -285,9 +288,10 @@ class kernel_bundle : public detail::kernel_bundle_plain {
get_specialization_constant() const {
assert(false && "get_specialization_constant is not implemented yet");
unsigned int SpecID = 0; // TODO: Convert SpecName to a numeric ID
typename std::remove_reference_t<decltype(SpecName)>::type *ValuePtr =
kernel_bundle_plain::get_specialization_constant(SpecID);
return *ValuePtr;
typename std::remove_reference_t<decltype(SpecName)>::type Value;
kernel_bundle_plain::get_specialization_constant_raw_value(
SpecID, (void *)&Value, sizeof(Value));
return Value;
}
#endif

Expand Down Expand Up @@ -334,7 +338,7 @@ namespace detail {

// Internal non-template versions of get_kernel_bundle API which is used by
// public onces
detail::KernelBundleImplPtr
__SYCL_EXPORT detail::KernelBundleImplPtr
get_kernel_bundle_impl(const context &Ctx, const std::vector<device> &Devs,
bundle_state State);
} // namespace detail
Expand All @@ -360,7 +364,7 @@ namespace detail {

// Internal non-template versions of get_kernel_bundle API which is used by
// public onces
detail::KernelBundleImplPtr
__SYCL_EXPORT detail::KernelBundleImplPtr
get_kernel_bundle_impl(const context &Ctx, const std::vector<device> &Devs,
const std::vector<kernel_id> &KernelIDs,
bundle_state State);
Expand Down Expand Up @@ -409,7 +413,7 @@ using DevImgSelectorImpl =

// Internal non-template versions of get_kernel_bundle API which is used by
// public onces
detail::KernelBundleImplPtr
__SYCL_EXPORT detail::KernelBundleImplPtr
get_kernel_bundle_impl(const context &Ctx, const std::vector<device> &Devs,
bundle_state State, const DevImgSelectorImpl &Selector);
} // namespace detail
Expand Down
1 change: 1 addition & 0 deletions sycl/source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ set(SYCL_SOURCES
"interop_handle.cpp"
"interop_handler.cpp"
"kernel.cpp"
"kernel_bundle.cpp"
"platform.cpp"
"program.cpp"
"queue.cpp"
Expand Down
160 changes: 160 additions & 0 deletions sycl/source/detail/device_image_impl.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
//==------- device_image_impl.hpp - SYCL device_image_impl -----------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <CL/sycl/context.hpp>
#include <CL/sycl/detail/common.hpp>
#include <CL/sycl/detail/pi.h>
#include <CL/sycl/device.hpp>
#include <CL/sycl/kernel_bundle.hpp>
#include <detail/device_impl.hpp>
#include <detail/kernel_id_impl.hpp>
#include <detail/program_manager/program_manager.hpp>

#include <algorithm>
#include <cassert>
#include <cstring>
#include <memory>
#include <mutex>
#include <vector>

__SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
namespace detail {

// Used for sorting vector of kernel_id's
struct LessByNameComp {
bool operator()(const sycl::kernel_id &LHS, const sycl::kernel_id &RHS) {
return std::strcmp(LHS.get_name(), RHS.get_name()) < 0;
}
};

// The class is impl counterpart for sycl::device_image
// It can represent a program in different states, kernel_id's it has and state
// of specialization constants for it
class device_image_impl {
public:
device_image_impl(RTDeviceBinaryImage *BinImage, context Context,
std::vector<device> Devices, bundle_state State)
: MBinImage(BinImage), MContext(std::move(Context)),
MDevices(std::move(Devices)), MState(State) {

// Collect kernel names for the image
pi_device_binary DevBin =
const_cast<pi_device_binary>(&BinImage->getRawData());
for (_pi_offload_entry EntriesIt = DevBin->EntriesBegin;
EntriesIt != DevBin->EntriesEnd; ++EntriesIt) {

std::shared_ptr<detail::kernel_id_impl> KernleIDImpl =
std::make_shared<detail::kernel_id_impl>(EntriesIt->name);

sycl::kernel_id KernelID =
detail::createSyclObjFromImpl<sycl::kernel_id>(KernleIDImpl);

// Insert new element keeping MKernelIDs sorted.
auto It = std::lower_bound(MKernelIDs.begin(), MKernelIDs.end(), KernelID,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor: I think pushing all the elements into the vector and then sorting it once should be more efficient than inserting each one while keeping the vector sorted.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do measurements.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem is that it might depends a lot on the real number of kernels.
Perhaps with SYCL generated by ML frameworks could have millions of kernels... ;-)
But for "normal" code and a few kernels?
Anyway, the std::sort path should be fast enough too for just a few kernels...

LessByNameComp{});
MKernelIDs.insert(It, std::move(KernelID));
}
}

bool has_kernel(const kernel_id &KernelIDCand) const noexcept {
return std::binary_search(MKernelIDs.begin(), MKernelIDs.end(),
KernelIDCand, LessByNameComp{});
}

bool has_kernel(const kernel_id &KernelIDCand,
const device &DeviceCand) const noexcept {
for (const device &Device : MDevices)
if (Device == DeviceCand)
return has_kernel(KernelIDCand);

return false;
}

const std::vector<kernel_id> &get_kernel_ids() const noexcept {
return MKernelIDs;
}

bool has_specialization_constants() const noexcept {
return !MSpecConstsBlob.empty();
}

bool all_specialization_constant_native() const noexcept {
assert(false && "Not implemented");
return false;
}

// The struct maps specialization ID to offset in the binary blob where value
// for this spec const should be.
struct SpecConstIDOffset {
unsigned int ID = 0;
unsigned int Offset = 0;
};

bool has_specialization_constant(unsigned int SpecID) const noexcept {
return std::any_of(
MSpecConstOffsets.begin(), MSpecConstOffsets.end(),
[SpecID](const SpecConstIDOffset &Pair) { return Pair.ID == SpecID; });
}

void set_specialization_constant_raw_value(unsigned int SpecID,
const void *Value,
size_t ValueSize) noexcept {
for (const SpecConstIDOffset &Pair : MSpecConstOffsets)
if (Pair.ID == SpecID) {
// Lock the mutex to prevent when one thread in the middle of writing a
// new value while another thread is reading the value to pass it to
// JIT compiler.
const std::lock_guard<std::mutex> SpecConstLock(MSpecConstAccessMtx);
std::memcpy(MSpecConstsBlob.data() + Pair.Offset, Value, ValueSize);
return;
}
}

void get_specialization_constant_raw_value(unsigned int SpecID,
void *ValueRet,
size_t ValueSize) const noexcept {
for (const SpecConstIDOffset &Pair : MSpecConstOffsets)
if (Pair.ID == SpecID) {
// Lock the mutex to prevent when one thread in the middle of writing a
// new value while another thread is reading the value to pass it to
// JIT compiler.
const std::lock_guard<std::mutex> SpecConstLock(MSpecConstAccessMtx);
std::memcpy(ValueRet, MSpecConstsBlob.data() + Pair.Offset, ValueSize);
return;
}
}

bundle_state get_state() const noexcept { return MState; }

void set_state(bundle_state NewState) noexcept { MState = NewState; }

private:
RTDeviceBinaryImage *MBinImage = nullptr;
context MContext;
std::vector<device> MDevices;
bundle_state MState;
// List of kernel ids available in this image, elements should be sorted
// according to LessByNameComp
std::vector<kernel_id> MKernelIDs;

// A mutex for sycnhronizing access to spec constants blob. Mutable because
// needs to be locked in the const method for getting spec constant value.
mutable std::mutex MSpecConstAccessMtx;
// Binary blob which can have values of all specialization constants in the
// image
std::vector<unsigned char> MSpecConstsBlob;
// Contains list of spec ID + their offsets in the MSpecConstsBlob
std::vector<SpecConstIDOffset> MSpecConstOffsets;
};

} // namespace detail
} // namespace sycl
} // __SYCL_INLINE_NAMESPACE(cl)
Loading