Skip to content

Commit 9095749

Browse files
sergey-semenovbader
authored andcommitted
[SYCL] Add runtime support for split device binaries (#759)
This patch enables usage of multiple split device binaries per OS module (executable or shared object file). The required binary is chosen based on the entry table (filled by clang-offload-wrapper) that lists all kernels contained within. Signed-off-by: Sergey Semenov <[email protected]>
1 parent 1e4c915 commit 9095749

File tree

18 files changed

+568
-261
lines changed

18 files changed

+568
-261
lines changed

sycl/include/CL/sycl/detail/common.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,15 @@ size_t getLinearIndex(const T<Dims> &Index, const U<Dims> &Range) {
246246
return LinearIndex;
247247
}
248248

249+
// Kernel set ID, used to group kernels (represented by OSModule & kernel name
250+
// pairs) into disjoint sets based on the kernel distribution among device
251+
// images.
252+
using KernelSetId = size_t;
253+
// Kernel set ID for kernels contained within the SPIRV file specified via
254+
// environment.
255+
constexpr KernelSetId SpvFileKSId = 0;
256+
constexpr KernelSetId LastKSId = SpvFileKSId;
257+
249258
} // namespace detail
250259
} // namespace sycl
251260
} // namespace cl

sycl/include/CL/sycl/detail/context_impl.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ class context_impl {
105105
/// Gets cached programs.
106106
///
107107
/// @return a map of cached programs.
108-
std::map<OSModuleHandle, RT::PiProgram> &getCachedPrograms() {
108+
std::map<KernelSetId, RT::PiProgram> &getCachedPrograms() {
109109
return MCachedPrograms;
110110
}
111111

@@ -128,7 +128,7 @@ class context_impl {
128128
platform MPlatform;
129129
bool MPluginInterop;
130130
bool MHostContext;
131-
std::map<OSModuleHandle, RT::PiProgram> MCachedPrograms;
131+
std::map<KernelSetId, RT::PiProgram> MCachedPrograms;
132132
std::map<RT::PiProgram, std::map<string_class, RT::PiKernel>> MCachedKernels;
133133
std::shared_ptr<usm::USMDispatcher> MUSMDispatch;
134134
};

sycl/include/CL/sycl/detail/pi.h

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -229,8 +229,16 @@ typedef _pi_sampler_addressing_mode pi_sampler_addressing_mode;
229229
typedef _pi_sampler_filter_mode pi_sampler_filter_mode;
230230
typedef _pi_sampler_info pi_sampler_info;
231231

232-
// Opaque data type for compatibility with OpenMP.
233-
typedef void * _pi_offload_entry;
232+
// Entry type, matches OpenMP for compatibility
233+
struct _pi_offload_entry_struct {
234+
void *addr;
235+
char *name;
236+
size_t size;
237+
int32_t flags;
238+
int32_t reserved;
239+
};
240+
241+
typedef _pi_offload_entry_struct * _pi_offload_entry;
234242

235243
/// Types of device binary.
236244
typedef uint8_t pi_device_binary_type;
@@ -302,7 +310,7 @@ struct pi_device_binary_struct {
302310
const unsigned char *BinaryStart;
303311
/// Pointer to the target code end
304312
const unsigned char *BinaryEnd;
305-
/// the offload entry table (not used, for compatibility with OpenMP)
313+
/// the offload entry table
306314
_pi_offload_entry EntriesBegin;
307315
_pi_offload_entry EntriesEnd;
308316
};

sycl/include/CL/sycl/detail/program_impl.hpp

Lines changed: 20 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,7 @@ class program_impl {
6262
if (!is_host()) {
6363
DevicesSorted = sort_devices_by_cl_device_id(Devices);
6464
}
65-
check_device_feature_support<
66-
info::device::is_linker_available>(Devices);
65+
check_device_feature_support<info::device::is_linker_available>(Devices);
6766
for (const auto &Prg : ProgramList) {
6867
Prg->throw_if_state_is_not(program_state::compiled);
6968
if (Prg->Context != Context) {
@@ -183,10 +182,9 @@ class program_impl {
183182
template <typename KernelT>
184183
void compile_with_kernel_type(string_class CompileOptions = "") {
185184
throw_if_state_is_not(program_state::none);
186-
// TODO Check for existence of kernel
187185
if (!is_host()) {
188186
OSModuleHandle M = OSUtil::getOSModuleHandle(AddressInThisModule);
189-
create_cl_program_with_il(M);
187+
create_pi_program_with_kernel_name(M, KernelInfo<KernelT>::getName());
190188
compile(CompileOptions);
191189
}
192190
State = program_state::compiled;
@@ -206,17 +204,16 @@ class program_impl {
206204
template <typename KernelT>
207205
void build_with_kernel_type(string_class BuildOptions = "") {
208206
throw_if_state_is_not(program_state::none);
209-
// TODO Check for existence of kernel
210207
if (!is_host()) {
211208
OSModuleHandle M = OSUtil::getOSModuleHandle(AddressInThisModule);
212209
// If there are no build options, program can be safely cached
213210
if (is_cacheable_with_options(BuildOptions)) {
214211
IsProgramAndKernelCachingAllowed = true;
215-
Program =
216-
ProgramManager::getInstance().getBuiltOpenCLProgram(M, Context);
212+
Program = ProgramManager::getInstance().getBuiltPIProgram(
213+
M, Context, KernelInfo<KernelT>::getName());
217214
PI_CALL(piProgramRetain)(Program);
218215
} else {
219-
create_cl_program_with_il(M);
216+
create_pi_program_with_kernel_name(M, KernelInfo<KernelT>::getName());
220217
build(BuildOptions);
221218
}
222219
}
@@ -237,8 +234,7 @@ class program_impl {
237234
void link(string_class LinkOptions = "") {
238235
throw_if_state_is_not(program_state::compiled);
239236
if (!is_host()) {
240-
check_device_feature_support<
241-
info::device::is_linker_available>(Devices);
237+
check_device_feature_support<info::device::is_linker_available>(Devices);
242238
vector_class<RT::PiDevice> Devices(get_pi_devices());
243239
PI_CALL_THROW(piProgramLink, compile_program_error)(
244240
detail::getSyclObjImpl(Context)->getHandleRef(), Devices.size(),
@@ -339,8 +335,7 @@ class program_impl {
339335

340336
private:
341337
template <info::device param>
342-
void check_device_feature_support(
343-
const vector_class<device> &devices) {
338+
void check_device_feature_support(const vector_class<device> &devices) {
344339
for (const auto &device : devices) {
345340
if (!device.get_info<param>()) {
346341
throw feature_not_supported(
@@ -349,9 +344,12 @@ class program_impl {
349344
}
350345
}
351346

352-
void create_cl_program_with_il(OSModuleHandle M) {
347+
void create_pi_program_with_kernel_name(OSModuleHandle M,
348+
const string_class &KernelName) {
353349
assert(!Program && "This program already has an encapsulated PI program");
354-
Program = ProgramManager::getInstance().createOpenCLProgram(M, Context);
350+
ProgramManager &PM = ProgramManager::getInstance();
351+
DeviceImage &Img = PM.getDeviceImage(M, KernelName, Context);
352+
Program = PM.createPIProgram(Img, Context);
355353
}
356354

357355
void create_cl_program_with_source(const string_class &Source) {
@@ -364,8 +362,7 @@ class program_impl {
364362
}
365363

366364
void compile(const string_class &Options) {
367-
check_device_feature_support<
368-
info::device::is_compiler_available>(Devices);
365+
check_device_feature_support<info::device::is_compiler_available>(Devices);
369366
vector_class<RT::PiDevice> Devices(get_pi_devices());
370367
RT::PiResult Err = PI_CALL_NOCHECK(piProgramCompile)(
371368
Program, Devices.size(), Devices.data(), Options.c_str(), 0, nullptr,
@@ -380,8 +377,7 @@ class program_impl {
380377
}
381378

382379
void build(const string_class &Options) {
383-
check_device_feature_support<
384-
info::device::is_compiler_available>(Devices);
380+
check_device_feature_support<info::device::is_compiler_available>(Devices);
385381
vector_class<RT::PiDevice> Devices(get_pi_devices());
386382
RT::PiResult Err =
387383
PI_CALL_NOCHECK(piProgramBuild)(Program, Devices.size(), Devices.data(),
@@ -402,6 +398,12 @@ class program_impl {
402398
return PiDevices;
403399
}
404400

401+
bool is_cacheable() const { return IsProgramAndKernelCachingAllowed; }
402+
403+
static bool is_cacheable_with_options(const string_class &Options) {
404+
return Options.empty();
405+
}
406+
405407
bool has_cl_kernel(const string_class &KernelName) const {
406408
size_t Size;
407409
PI_CALL(piProgramGetInfo)(Program, CL_PROGRAM_KERNEL_NAMES, 0, nullptr,
@@ -420,15 +422,6 @@ class program_impl {
420422
return false;
421423
}
422424

423-
bool is_cacheable() const {
424-
return IsProgramAndKernelCachingAllowed;
425-
}
426-
427-
static bool
428-
is_cacheable_with_options(const string_class &Options) {
429-
return Options.empty();
430-
}
431-
432425
RT::PiKernel get_pi_kernel(const string_class &KernelName) const {
433426
RT::PiKernel Kernel;
434427

sycl/include/CL/sycl/detail/program_manager/program_manager.hpp

Lines changed: 54 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,11 @@ class ProgramManager {
4848
// Returns the single instance of the program manager for the entire process.
4949
// Can only be called after staticInit is done.
5050
static ProgramManager &getInstance();
51-
RT::PiProgram createOpenCLProgram(OSModuleHandle M, const context &Context,
52-
DeviceImage **I = nullptr) {
53-
return loadProgram(M, Context, I);
54-
}
55-
RT::PiProgram getBuiltOpenCLProgram(OSModuleHandle M, const context &Context);
51+
DeviceImage &getDeviceImage(OSModuleHandle M, const string_class &KernelName,
52+
const context &Context);
53+
RT::PiProgram createPIProgram(const DeviceImage &Img, const context &Context);
54+
RT::PiProgram getBuiltPIProgram(OSModuleHandle M, const context &Context,
55+
const string_class &KernelName);
5656
RT::PiKernel getOrCreateKernel(OSModuleHandle M, const context &Context,
5757
const string_class &KernelName);
5858
RT::PiProgram getClProgramFromClKernel(RT::PiKernel Kernel);
@@ -63,26 +63,65 @@ class ProgramManager {
6363
static string_class getProgramBuildLog(const RT::PiProgram &Program);
6464

6565
private:
66-
RT::PiProgram loadProgram(OSModuleHandle M, const context &Context,
67-
DeviceImage **I = nullptr);
68-
void build(RT::PiProgram Program, const string_class &Options = "",
69-
std::vector<RT::PiDevice> Devices = std::vector<RT::PiDevice>());
70-
71-
ProgramManager() = default;
66+
ProgramManager();
7267
~ProgramManager() = default;
7368
ProgramManager(ProgramManager const &) = delete;
7469
ProgramManager &operator=(ProgramManager const &) = delete;
7570

71+
DeviceImage &getDeviceImage(OSModuleHandle M, KernelSetId KSId,
72+
const context &Context);
73+
void build(RT::PiProgram Program, const string_class &Options,
74+
std::vector<RT::PiDevice> Devices);
75+
/// Provides a new kernel set id for grouping kernel names together
76+
KernelSetId getNextKernelSetId() const;
77+
/// Returns the kernel set associated with the kernel, handles some special
78+
/// cases (when reading images from file or using images with no entry info)
79+
KernelSetId getKernelSetId(OSModuleHandle M,
80+
const string_class &KernelName) const;
81+
/// Returns the format of the binary image
82+
RT::PiDeviceBinaryType getFormat(const DeviceImage &Img) const;
83+
/// Dumps image to current directory
84+
void dumpImage(const DeviceImage &Img, KernelSetId KSId) const;
85+
86+
/// The three maps below are used during kernel resolution. Any kernel is
87+
/// identified by its name and the OS module it's coming from, allowing
88+
/// kernels with identical names in different OS modules. The following
89+
/// assumption is made: for any two device images in a SYCL application their
90+
/// kernel sets are either identical or disjoint.
91+
/// Based on this assumption, m_KernelSets is used to group kernels together
92+
/// into sets by assigning a set ID to them during device image registration.
93+
/// This ID is then mapped to a vector of device images containing kernels
94+
/// from the set (m_DeviceImages).
95+
/// An exception is made for device images with no entry information: a
96+
/// special kernel set ID is used for them which is assigned to just the OS
97+
/// module. These kernel set ids are stored in m_OSModuleKernelSets and device
98+
/// images associated with them are assumed to contain all kernels coming from
99+
/// that OS module.
100+
76101
/// Keeps all available device executable images added via \ref addImages.
77-
/// Organizes the images as a map from a module handle (.exe .dll) to the
78-
/// vector of images coming from the module.
102+
/// Organizes the images as a map from a kernel set id to the vector of images
103+
/// containing kernels from that set.
104+
/// Access must be guarded by the \ref Sync::getGlobalLock()
105+
std::map<KernelSetId, std::unique_ptr<std::vector<DeviceImage *>>> m_DeviceImages;
106+
107+
using StrToKSIdMap = std::map<string_class, KernelSetId>;
108+
/// Maps names of kernels from a specific OS module (.exe .dll) to their set
109+
/// id (the sets are disjoint).
110+
/// Access must be guarded by the \ref Sync::getGlobalLock()
111+
std::map<OSModuleHandle, StrToKSIdMap> m_KernelSets;
112+
113+
/// Keeps kernel sets for OS modules containing images without entry info.
114+
/// Such images are assumed to contain all kernel associated with the module.
79115
/// Access must be guarded by the \ref Sync::getGlobalLock()
80-
std::map<OSModuleHandle, std::unique_ptr<std::vector<DeviceImage *>>>
81-
m_DeviceImages;
116+
std::map<OSModuleHandle, KernelSetId> m_OSModuleKernelSets;
117+
82118
/// Keeps device images not bound to a particular module. Program manager
83119
/// allocated memory for these images, so they are auto-freed in destructor.
84120
/// No image can out-live the Program manager.
85121
std::vector<std::unique_ptr<DeviceImage, ImageDeleter>> m_OrphanDeviceImages;
122+
123+
/// True iff a SPIRV file has been specified with an environment variable
124+
bool m_UseSpvFile = false;
86125
};
87126
} // namespace detail
88127
} // namespace sycl

sycl/plugins/opencl/pi_opencl.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,10 @@ pi_result OCL(piDevicesGet)(pi_platform platform, pi_device_type device_type,
6868
return cast<pi_result>(result);
6969
}
7070

71-
pi_result OCL(piextDeviceSelectBinary)(
72-
pi_device device, // TODO: does this need to be context?
73-
pi_device_binary *images, pi_uint32 num_images,
74-
pi_device_binary *selected_image) {
71+
pi_result OCL(piextDeviceSelectBinary)(pi_device device,
72+
pi_device_binary *images,
73+
pi_uint32 num_images,
74+
pi_device_binary *selected_image) {
7575

7676
// TODO: this is a bare-bones implementation for choosing a device image
7777
// that would be compatible with the targeted device. An AOT-compiled

sycl/source/detail/context_impl.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,8 @@ context_impl::~context_impl() {
8686
// Release all programs and kernels created with this context
8787
for (auto ProgIt : MCachedPrograms) {
8888
RT::PiProgram ToBeDeleted = ProgIt.second;
89-
for (auto KernIt : MCachedKernels[ToBeDeleted]) {
89+
for (auto KernIt : MCachedKernels[ToBeDeleted])
9090
PI_CALL(piKernelRelease)(KernIt.second);
91-
}
9291
PI_CALL(piProgramRelease)(ToBeDeleted);
9392
}
9493
}

sycl/source/detail/os_util.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ struct ModuleInfo {
5656
const char *Name; // out
5757
};
5858

59+
constexpr OSModuleHandle OSUtil::ExeModuleHandle;
60+
5961
static int callback(struct dl_phdr_info *Info, size_t Size, void *Data) {
6062
auto Base = reinterpret_cast<unsigned char *>(Info->dlpi_addr);
6163
auto MI = reinterpret_cast<ModuleInfo *>(Data);

0 commit comments

Comments
 (0)