Skip to content

Commit 0ab45d9

Browse files
[SYCL] Implement loading SYCLBIN into kernel_bundle (#18949)
This commit implements the functionality for loading SYCLBIN files into kernel bundles. This is done by mimicking the structure of regular device binaries, then letting the existing functionality handle compiling and linking. This implements part of the sycl_ext_oneapi_syclbin extension. Note that parts of this implementation uses functionality copied from LLVMSupport and LLVMObject. Eventually they should be replaced in favor of using the LLVM libraries directly. --------- Signed-off-by: Larsen, Steffen <[email protected]>
1 parent b3b8073 commit 0ab45d9

40 files changed

+1925
-87
lines changed
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
//==---- syclbin_kernel_bundle.hpp - SYCLBIN-based kernel_bundle tooling ---==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include <sycl/ext/oneapi/properties/properties.hpp>
12+
#include <sycl/kernel_bundle.hpp>
13+
14+
#include <fstream>
15+
#include <string>
16+
17+
#if __has_include(<filesystem>)
18+
#include <filesystem>
19+
#endif
20+
21+
#if __has_include(<span>)
22+
#include <span>
23+
#endif
24+
25+
namespace sycl {
26+
inline namespace _V1 {
27+
namespace ext::oneapi::experimental {
28+
29+
template <bundle_state State, typename PropertyListT = empty_properties_t>
30+
std::enable_if_t<State != bundle_state::ext_oneapi_source, kernel_bundle<State>>
31+
get_kernel_bundle(const context &Ctxt, const std::vector<device> &Devs,
32+
const sycl::span<char> &Bytes, PropertyListT = {}) {
33+
std::vector<device> UniqueDevices =
34+
sycl::detail::removeDuplicateDevices(Devs);
35+
36+
sycl::detail::KernelBundleImplPtr Impl =
37+
sycl::detail::get_kernel_bundle_impl(Ctxt, UniqueDevices, Bytes, State);
38+
return sycl::detail::createSyclObjFromImpl<kernel_bundle<State>>(Impl);
39+
}
40+
41+
#if __cpp_lib_span
42+
template <bundle_state State, typename PropertyListT = empty_properties_t>
43+
std::enable_if_t<State != bundle_state::ext_oneapi_source, kernel_bundle<State>>
44+
get_kernel_bundle(const context &Ctxt, const std::vector<device> &Devs,
45+
const std::span<char> &Bytes, PropertyListT Props = {}) {
46+
return experimental::get_kernel_bundle(
47+
Ctxt, Devs, sycl::span<char>(Bytes.data(), Bytes.size()), Props);
48+
}
49+
#endif
50+
51+
#if __cpp_lib_filesystem
52+
template <bundle_state State, typename PropertyListT = empty_properties_t>
53+
std::enable_if_t<State != bundle_state::ext_oneapi_source, kernel_bundle<State>>
54+
get_kernel_bundle(const context &Ctxt, const std::vector<device> &Devs,
55+
const std::filesystem::path &Filename,
56+
PropertyListT Props = {}) {
57+
std::vector<char> RawSYCLBINData;
58+
{
59+
std::ifstream FileStream{Filename, std::ios::binary};
60+
if (!FileStream.is_open())
61+
throw std::ios_base::failure("Failed to open SYCLBIN file: " +
62+
Filename.string());
63+
RawSYCLBINData =
64+
std::vector<char>{std::istreambuf_iterator<char>(FileStream),
65+
std::istreambuf_iterator<char>()};
66+
}
67+
return experimental::get_kernel_bundle<State>(
68+
Ctxt, Devs, sycl::span<char>{RawSYCLBINData}, Props);
69+
}
70+
71+
template <bundle_state State, typename PropertyListT = empty_properties_t>
72+
std::enable_if_t<State != bundle_state::ext_oneapi_source, kernel_bundle<State>>
73+
get_kernel_bundle(const context &Ctxt, const std::filesystem::path &Filename,
74+
PropertyListT Props = {}) {
75+
return experimental::get_kernel_bundle<State>(Ctxt, Ctxt.get_devices(),
76+
Filename, Props);
77+
}
78+
#endif
79+
80+
} // namespace ext::oneapi::experimental
81+
} // namespace _V1
82+
} // namespace sycl

sycl/include/sycl/kernel_bundle.hpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
#include <sycl/kernel.hpp> // for kernel, kernel_bundle
2020
#include <sycl/kernel_bundle_enums.hpp> // for bundle_state
2121
#include <sycl/property_list.hpp> // for property_list
22-
#include <ur_api.h> // for ur_native_handle_t
22+
#include <sycl/sycl_span.hpp>
23+
#include <ur_api.h>
2324

2425
#include <sycl/ext/oneapi/experimental/free_function_traits.hpp>
2526
#include <sycl/ext/oneapi/properties/properties.hpp> // PropertyT
@@ -639,6 +640,10 @@ __SYCL_EXPORT detail::KernelBundleImplPtr
639640
get_kernel_bundle_impl(const context &Ctx, const std::vector<device> &Devs,
640641
bundle_state State);
641642

643+
__SYCL_EXPORT detail::KernelBundleImplPtr
644+
get_kernel_bundle_impl(const context &Ctx, const std::vector<device> &Devs,
645+
const sycl::span<char> &Bytes, bundle_state State);
646+
642647
__SYCL_EXPORT const std::vector<device>
643648
removeDuplicateDevices(const std::vector<device> &Devs);
644649

sycl/include/sycl/sycl.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@
105105
#include <sycl/ext/oneapi/experimental/raw_kernel_arg.hpp>
106106
#include <sycl/ext/oneapi/experimental/reduction_properties.hpp>
107107
#include <sycl/ext/oneapi/experimental/root_group.hpp>
108+
#include <sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp>
108109
#include <sycl/ext/oneapi/experimental/tangle_group.hpp>
109110
#include <sycl/ext/oneapi/experimental/work_group_memory.hpp>
110111
#include <sycl/ext/oneapi/filter_selector.hpp>

sycl/source/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,7 @@ set(SYCL_COMMON_SOURCES
301301
"detail/reduction.cpp"
302302
"detail/sampler_impl.cpp"
303303
"detail/stream_impl.cpp"
304+
"detail/syclbin.cpp"
304305
"detail/scheduler/commands.cpp"
305306
"detail/scheduler/leaves_collection.cpp"
306307
"detail/scheduler/scheduler.cpp"

sycl/source/detail/base64.hpp

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
//===--- Base64.h - Base64 Encoder/Decoder ----------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// Adjusted copy of llvm/include/llvm/Support/Base64.h.
9+
// TODO: Remove once we can consistently link the SYCL runtime library with
10+
// LLVMSupport.
11+
12+
#pragma once
13+
14+
#include <cstdint>
15+
#include <memory>
16+
#include <string>
17+
#include <vector>
18+
19+
namespace sycl {
20+
inline namespace _V1 {
21+
namespace detail {
22+
23+
class Base64 {
24+
private:
25+
// Decode a single character.
26+
static inline int decode(char Ch) {
27+
if (Ch >= 'A' && Ch <= 'Z') // 0..25
28+
return Ch - 'A';
29+
else if (Ch >= 'a' && Ch <= 'z') // 26..51
30+
return Ch - 'a' + 26;
31+
else if (Ch >= '0' && Ch <= '9') // 52..61
32+
return Ch - '0' + 52;
33+
else if (Ch == '+') // 62
34+
return 62;
35+
else if (Ch == '/') // 63
36+
return 63;
37+
return -1;
38+
}
39+
40+
// Decode a quadruple of characters.
41+
static inline void decode4(const char *Src, byte *Dst) {
42+
int BadCh = -1;
43+
44+
for (auto I = 0; I < 4; ++I) {
45+
char Ch = Src[I];
46+
int Byte = decode(Ch);
47+
48+
if (Byte < 0) {
49+
BadCh = Ch;
50+
break;
51+
}
52+
Dst[I] = (byte)Byte;
53+
}
54+
if (BadCh != -1)
55+
throw sycl::exception(make_error_code(errc::invalid),
56+
"Invalid char in base 64 encoding.");
57+
}
58+
59+
public:
60+
using byte = uint8_t;
61+
62+
// Get the size of the encoded byte sequence of given size.
63+
static size_t getDecodedSize(size_t SrcSize) { return (SrcSize * 3 + 3) / 4; }
64+
65+
// Decode a sequence of given size into a pre-allocated memory.
66+
// Returns the number of bytes in the decoded result or 0 in case of error.
67+
static size_t decode(const char *Src, byte *Dst, size_t SrcSize) {
68+
size_t SrcOff = 0;
69+
size_t DstOff = 0;
70+
71+
// decode full quads
72+
for (size_t Qch = 0; Qch < SrcSize / 4; ++Qch, SrcOff += 4, DstOff += 3) {
73+
byte Ch[4] = {0, 0, 0, 0};
74+
decode4(Src + SrcOff, Ch);
75+
76+
// each quad of chars produces three bytes of output
77+
Dst[DstOff + 0] = Ch[0] | (Ch[1] << 6);
78+
Dst[DstOff + 1] = (Ch[1] >> 2) | (Ch[2] << 4);
79+
Dst[DstOff + 2] = (Ch[2] >> 4) | (Ch[3] << 2);
80+
}
81+
auto RemChars = SrcSize - SrcOff;
82+
83+
if (RemChars == 0)
84+
return DstOff;
85+
// decode the remainder; variants:
86+
// 2 chars remain - produces single byte
87+
// 3 chars remain - produces two bytes
88+
89+
if (RemChars != 2 && RemChars != 3)
90+
throw sycl::exception(make_error_code(errc::invalid),
91+
"Invalid encoded sequence length.");
92+
93+
int Ch0 = decode(Src[SrcOff++]);
94+
int Ch1 = decode(Src[SrcOff++]);
95+
int Ch2 = RemChars == 3 ? decode(Src[SrcOff]) : 0;
96+
97+
if (Ch0 < 0 || Ch1 < 0 || Ch2 < 0)
98+
throw sycl::exception(
99+
make_error_code(errc::invalid),
100+
"Invalid characters in the encoded sequence remainder.");
101+
Dst[DstOff++] = Ch0 | (Ch1 << 6);
102+
103+
if (RemChars == 3)
104+
Dst[DstOff++] = (Ch1 >> 2) | (Ch2 << 4);
105+
return DstOff;
106+
}
107+
108+
// Allocate minimum required amount of memory and decode a sequence of given
109+
// size into it.
110+
// Returns the decoded result. The size can be obtained via getDecodedSize.
111+
static std::unique_ptr<byte[]> decode(const char *Src, size_t SrcSize) {
112+
size_t DstSize = getDecodedSize(SrcSize);
113+
std::unique_ptr<byte[]> Dst(new byte[DstSize]);
114+
decode(Src, Dst.get(), SrcSize);
115+
return Dst;
116+
}
117+
};
118+
119+
} // namespace detail
120+
} // namespace _V1
121+
} // namespace sycl

sycl/source/detail/compiler.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@
5757
#define __SYCL_PROPERTY_SET_SYCL_MISC_PROP "SYCL/misc properties"
5858
/// PropertySetRegistry::SYCL_ASSERT_USED defined in PropertySetIO.h
5959
#define __SYCL_PROPERTY_SET_SYCL_ASSERT_USED "SYCL/assert used"
60+
/// PropertySetRegistry::SYCL_KERNEL_NAMES defined in PropertySetIO.h
61+
#define __SYCL_PROPERTY_SET_SYCL_KERNEL_NAMES "SYCL/kernel names"
6062
/// PropertySetRegistry::SYCL_EXPORTED_SYMBOLS defined in PropertySetIO.h
6163
#define __SYCL_PROPERTY_SET_SYCL_EXPORTED_SYMBOLS "SYCL/exported symbols"
6264
/// PropertySetRegistry::SYCL_IMPORTED_SYMBOLS defined in PropertySetIO.h

sycl/source/detail/device_binary_image.cpp

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ RTDeviceBinaryImage::getProperty(const char *PropName) const {
162162
return *It;
163163
}
164164

165-
void RTDeviceBinaryImage::init(sycl_device_binary Bin) {
165+
RTDeviceBinaryImage::RTDeviceBinaryImage(sycl_device_binary Bin) {
166166
ImageId = ImageCounter++;
167167

168168
// If there was no binary, we let the owner handle initialization as they see
@@ -199,6 +199,7 @@ void RTDeviceBinaryImage::init(sycl_device_binary Bin) {
199199
ProgramMetadataUR.push_back(
200200
ur::mapDeviceBinaryPropertyToProgramMetadata(Prop));
201201
}
202+
KernelNames.init(Bin, __SYCL_PROPERTY_SET_SYCL_KERNEL_NAMES);
202203
ExportedSymbols.init(Bin, __SYCL_PROPERTY_SET_SYCL_EXPORTED_SYMBOLS);
203204
ImportedSymbols.init(Bin, __SYCL_PROPERTY_SET_SYCL_IMPORTED_SYMBOLS);
204205
DeviceGlobals.init(Bin, __SYCL_PROPERTY_SET_SYCL_DEVICE_GLOBALS);
@@ -211,7 +212,8 @@ void RTDeviceBinaryImage::init(sycl_device_binary Bin) {
211212

212213
std::atomic<uintptr_t> RTDeviceBinaryImage::ImageCounter = 1;
213214

214-
DynRTDeviceBinaryImage::DynRTDeviceBinaryImage() : RTDeviceBinaryImage() {
215+
DynRTDeviceBinaryImage::DynRTDeviceBinaryImage()
216+
: RTDeviceBinaryImage(nullptr) {
215217
Bin = new sycl_device_binary_struct();
216218
Bin->Version = SYCL_DEVICE_BINARY_VERSION;
217219
Bin->Kind = SYCL_DEVICE_BINARY_OFFLOAD_KIND_SYCL;
@@ -227,12 +229,11 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage() : RTDeviceBinaryImage() {
227229
Bin->DeviceTargetSpec = __SYCL_DEVICE_BINARY_TARGET_UNKNOWN;
228230
}
229231

230-
DynRTDeviceBinaryImage::DynRTDeviceBinaryImage(
231-
std::unique_ptr<char[], std::function<void(void *)>> &&DataPtr,
232-
size_t DataSize)
233-
: DynRTDeviceBinaryImage() {
234-
Data = std::move(DataPtr);
235-
Bin->BinaryStart = reinterpret_cast<unsigned char *>(Data.get());
232+
std::unique_ptr<sycl_device_binary_struct> CreateDefaultDynBinary(
233+
const std::unique_ptr<char[], std::function<void(void *)>> &DataPtr,
234+
size_t DataSize) {
235+
auto Bin = std::make_unique<sycl_device_binary_struct>();
236+
Bin->BinaryStart = reinterpret_cast<unsigned char *>(DataPtr.get());
236237
Bin->BinaryEnd = Bin->BinaryStart + DataSize;
237238
Bin->Format = ur::getBinaryImageFormat(Bin->BinaryStart, DataSize);
238239
switch (Bin->Format) {
@@ -242,9 +243,15 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage(
242243
default:
243244
Bin->DeviceTargetSpec = __SYCL_DEVICE_BINARY_TARGET_UNKNOWN;
244245
}
245-
init(Bin);
246+
return Bin;
246247
}
247248

249+
DynRTDeviceBinaryImage::DynRTDeviceBinaryImage(
250+
std::unique_ptr<char[], std::function<void(void *)>> &&DataPtr,
251+
size_t DataSize)
252+
: RTDeviceBinaryImage(CreateDefaultDynBinary(DataPtr, DataSize).release()),
253+
Data{std::move(DataPtr)} {}
254+
248255
DynRTDeviceBinaryImage::~DynRTDeviceBinaryImage() {
249256
delete Bin;
250257
Bin = nullptr;
@@ -479,8 +486,6 @@ static void copyProperty(sycl_device_binary_property &NextFreeProperty,
479486
DynRTDeviceBinaryImage::DynRTDeviceBinaryImage(
480487
const std::vector<const RTDeviceBinaryImage *> &Imgs)
481488
: DynRTDeviceBinaryImage() {
482-
init(nullptr);
483-
484489
// Naive merges.
485490
auto MergedSpecConstants =
486491
naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) {
@@ -510,6 +515,10 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage(
510515
naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) {
511516
return Img.getImplicitLocalArg();
512517
});
518+
auto MergedKernelNames =
519+
naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) {
520+
return Img.getKernelNames();
521+
});
513522
auto MergedExportedSymbols =
514523
naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) {
515524
return Img.getExportedSymbols();
@@ -519,12 +528,13 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage(
519528
return Img.getRegisteredKernels();
520529
});
521530

522-
std::array<const std::vector<sycl_device_binary_property> *, 10> MergedVecs{
531+
std::array<const std::vector<sycl_device_binary_property> *, 11> MergedVecs{
523532
&MergedSpecConstants, &MergedSpecConstantsDefaultValues,
524533
&MergedKernelParamOptInfo, &MergedAssertUsed,
525534
&MergedDeviceGlobals, &MergedHostPipes,
526535
&MergedVirtualFunctions, &MergedImplicitLocalArg,
527-
&MergedExportedSymbols, &MergedRegisteredKernels};
536+
&MergedKernelNames, &MergedExportedSymbols,
537+
&MergedRegisteredKernels};
528538

529539
// Exclusive merges.
530540
auto MergedDeviceLibReqMask =
@@ -648,6 +658,7 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage(
648658
CopyPropertiesVec(MergedHostPipes, HostPipes);
649659
CopyPropertiesVec(MergedVirtualFunctions, VirtualFunctions);
650660
CopyPropertiesVec(MergedImplicitLocalArg, ImplicitLocalArg);
661+
CopyPropertiesVec(MergedKernelNames, KernelNames);
651662
CopyPropertiesVec(MergedExportedSymbols, ExportedSymbols);
652663
CopyPropertiesVec(MergedRegisteredKernels, RegisteredKernels);
653664

@@ -675,18 +686,11 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage(
675686
#ifdef SYCL_RT_ZSTD_AVAILABLE
676687
CompressedRTDeviceBinaryImage::CompressedRTDeviceBinaryImage(
677688
sycl_device_binary CompressedBin)
678-
: RTDeviceBinaryImage() {
679-
680-
// 'CompressedBin' is part of the executable image loaded into memory
681-
// which can't be modified easily. So, we need to make a copy of it.
682-
Bin = new sycl_device_binary_struct(*CompressedBin);
683-
689+
: RTDeviceBinaryImage(new sycl_device_binary_struct(*CompressedBin)) {
684690
// Get the decompressed size of the binary image.
685691
m_ImageSize = ZSTDCompressor::GetDecompressedSize(
686692
reinterpret_cast<const char *>(Bin->BinaryStart),
687693
static_cast<size_t>(Bin->BinaryEnd - Bin->BinaryStart));
688-
689-
init(Bin);
690694
}
691695

692696
void CompressedRTDeviceBinaryImage::Decompress() {

0 commit comments

Comments
 (0)