Skip to content

[SYCL] Use device code cache when building kernel bundles #4724

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 59 additions & 45 deletions sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,30 @@ static void applyOptionsFromEnvironment(std::string &CompileOpts,
}
}

std::pair<RT::PiProgram, bool> ProgramManager::getOrCreatePIProgram(
const RTDeviceBinaryImage &Img, const context &Context,
const device &Device, const std::string &CompileAndLinkOptions,
SerializedObj SpecConsts) {
RT::PiProgram NativePrg;

auto BinProg = PersistentDeviceCodeCache::getItemFromDisc(
Device, Img, SpecConsts, CompileAndLinkOptions);
if (BinProg.size()) {
// Get program metadata from properties
auto ProgMetadata = Img.getProgramMetadata();
std::vector<pi_device_binary_property> ProgMetadataVector{
ProgMetadata.begin(), ProgMetadata.end()};

// TODO: Build for multiple devices once supported by program manager
NativePrg = createBinaryProgram(getSyclObjImpl(Context), Device,
(const unsigned char *)BinProg[0].data(),
BinProg[0].size(), ProgMetadataVector);
} else {
NativePrg = createPIProgram(Img, Context, Device);
}
return {NativePrg, BinProg.size()};
}

RT::PiProgram ProgramManager::getBuiltPIProgram(
OSModuleHandle M, const ContextImplPtr &ContextImpl,
const DeviceImplPtr &DeviceImpl, const std::string &KernelName,
Expand Down Expand Up @@ -455,27 +479,14 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(
applyOptionsFromImage(CompileOpts, LinkOpts, Img);

const detail::plugin &Plugin = ContextImpl->getPlugin();
RT::PiProgram NativePrg;

// Get program metadata from properties
auto ProgMetadata = Img.getProgramMetadata();
std::vector<pi_device_binary_property> ProgMetadataVector{
ProgMetadata.begin(), ProgMetadata.end()};
auto [NativePrg, DeviceCodeWasInCache] = getOrCreatePIProgram(
Img, Context, Device, CompileOpts + LinkOpts, SpecConsts);

auto BinProg = PersistentDeviceCodeCache::getItemFromDisc(
Device, Img, SpecConsts, CompileOpts + LinkOpts);
if (BinProg.size()) {
// TODO: Build for multiple devices once supported by program manager
NativePrg = createBinaryProgram(ContextImpl, Device,
(const unsigned char *)BinProg[0].data(),
BinProg[0].size(), ProgMetadataVector);
} else {
NativePrg = createPIProgram(Img, Context, Device);
if (!DeviceCodeWasInCache) {
if (Prg)
flushSpecConstants(*Prg, NativePrg, &Img);
if (Img.supportsSpecConstants())
enableITTAnnotationsIfNeeded(NativePrg,
getSyclObjImpl(Device)->getPlugin());
enableITTAnnotationsIfNeeded(NativePrg, Plugin);
}

ProgramPtr ProgramManaged(
Expand All @@ -488,7 +499,8 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(
// If device image is not SPIR-V, DeviceLibReqMask will be 0 which means
// no fallback device library will be linked.
uint32_t DeviceLibReqMask = 0;
if (!BinProg.size() && Img.getFormat() == PI_DEVICE_BINARY_TYPE_SPIRV &&
if (!DeviceCodeWasInCache &&
Img.getFormat() == PI_DEVICE_BINARY_TYPE_SPIRV &&
!SYCLConfig<SYCL_DEVICELIB_NO_FALLBACK>::get())
DeviceLibReqMask = getDeviceLibReqMask(Img);

Expand All @@ -503,7 +515,7 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(
}

// Save program to persistent cache if it is not there
if (!BinProg.size())
if (!DeviceCodeWasInCache)
PersistentDeviceCodeCache::putItemToDisc(
Device, Img, SpecConsts, CompileOpts + LinkOpts, BuiltProgram.get());
return BuiltProgram.release();
Expand Down Expand Up @@ -1549,9 +1561,8 @@ ProgramManager::compile(const device_image_plain &DeviceImage,
RT::PiProgram Prog = createPIProgram(*InputImpl->get_bin_image_ref(),
InputImpl->get_context(), Devs[0]);

for (const device &Dev : Devs)
if (InputImpl->get_bin_image_ref()->supportsSpecConstants())
enableITTAnnotationsIfNeeded(Prog, getSyclObjImpl(Dev)->getPlugin());
if (InputImpl->get_bin_image_ref()->supportsSpecConstants())
enableITTAnnotationsIfNeeded(Prog, Plugin);

DeviceImageImplPtr ObjectImpl = std::make_shared<detail::device_image_impl>(
InputImpl->get_bin_image_ref(), InputImpl->get_context(), Devs,
Expand Down Expand Up @@ -1674,9 +1685,11 @@ device_image_plain ProgramManager::build(const device_image_plain &DeviceImage,
const RTDeviceBinaryImage *ImgPtr = InputImpl->get_bin_image_ref();
const RTDeviceBinaryImage &Img = *ImgPtr;

SerializedObj SpecConsts = InputImpl->get_spec_const_blob_ref();

// TODO: Unify this code with getBuiltPIProgram
auto BuildF = [this, &Context, Img, &Devs, &CompileOpts, &LinkOpts,
&InputImpl] {
&InputImpl, SpecConsts] {
applyOptionsFromImage(CompileOpts, LinkOpts, Img);
ContextImplPtr ContextImpl = getSyclObjImpl(Context);
const detail::plugin &Plugin = ContextImpl->getPlugin();
Expand All @@ -1692,29 +1705,27 @@ device_image_plain ProgramManager::build(const device_image_plain &DeviceImage,

// Device is not used when creating program from SPIRV, so passing only one
// device is OK.
RT::PiProgram NativePrg = createPIProgram(Img, Context, Devs[0]);
auto [NativePrg, DeviceCodeWasInCache] = getOrCreatePIProgram(
Img, Context, Devs[0], CompileOpts + LinkOpts, SpecConsts);

for (const device &Dev : Devs)
if (!DeviceCodeWasInCache) {
if (InputImpl->get_bin_image_ref()->supportsSpecConstants())
enableITTAnnotationsIfNeeded(NativePrg,
getSyclObjImpl(Dev)->getPlugin());

const std::vector<unsigned char> &SpecConstsBlob =
InputImpl->get_spec_const_blob_ref();
enableITTAnnotationsIfNeeded(NativePrg, Plugin);

{
std::lock_guard<std::mutex> Lock{InputImpl->get_spec_const_data_lock()};
const std::map<std::string,
std::vector<device_image_impl::SpecConstDescT>>
&SpecConstData = InputImpl->get_spec_const_data_ref();

for (const auto &DescPair : SpecConstData) {
for (const device_image_impl::SpecConstDescT &SpecIDDesc :
DescPair.second) {
if (SpecIDDesc.IsSet) {
Plugin.call<PiApiKind::piextProgramSetSpecializationConstant>(
NativePrg, SpecIDDesc.ID, SpecIDDesc.Size,
SpecConstsBlob.data() + SpecIDDesc.BlobOffset);
{
std::lock_guard<std::mutex> Lock{InputImpl->get_spec_const_data_lock()};
const std::map<std::string,
std::vector<device_image_impl::SpecConstDescT>>
&SpecConstData = InputImpl->get_spec_const_data_ref();

for (const auto &DescPair : SpecConstData) {
for (const device_image_impl::SpecConstDescT &SpecIDDesc :
DescPair.second) {
if (SpecIDDesc.IsSet) {
Plugin.call<PiApiKind::piextProgramSetSpecializationConstant>(
NativePrg, SpecIDDesc.ID, SpecIDDesc.Size,
SpecConsts.data() + SpecIDDesc.BlobOffset);
}
}
}
}
Expand Down Expand Up @@ -1743,11 +1754,14 @@ device_image_plain ProgramManager::build(const device_image_plain &DeviceImage,
NativePrograms[BuiltProgram.get()] = &Img;
}

// Save program to persistent cache if it is not there
if (!DeviceCodeWasInCache)
PersistentDeviceCodeCache::putItemToDisc(
Devs[0], Img, SpecConsts, CompileOpts + LinkOpts, BuiltProgram.get());

return BuiltProgram.release();
};

SerializedObj SpecConsts = InputImpl->get_spec_const_blob_ref();

const RT::PiDevice PiDevice = getRawSyclObjImpl(Devs[0])->getHandleRef();
// TODO: Throw SYCL2020 style exception
auto BuildResult = getOrBuild<PiProgramT, compile_program_error>(
Expand Down
23 changes: 23 additions & 0 deletions sycl/source/detail/program_manager/program_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,29 @@ class ProgramManager {
bool JITCompilationIsRequired = false);
RT::PiProgram createPIProgram(const RTDeviceBinaryImage &Img,
const context &Context, const device &Device);
/// Creates a PI program using either a cached device code binary if present
/// in the persistent cache or from the supplied device image otherwise.
/// \param Img The device image to find a cached device code binary for or
/// create the PI program with.
/// \param Context The context to find or create the PI program with.
/// \param Device The device to find or create the PI program for.
/// \param CompileAndLinkOptions The compile and linking options to be used
/// for building the PI program. These options must appear in the
/// mentioned order. This parameter is used as a partial key in the
/// cache and has no effect if no cached device code binary is found in
/// the persistent cache.
/// \param SpecConsts Specialization constants associated with the device
/// image. This parameter is used as a partial key in the cache and
/// has no effect if no cached device code binary is found in the
/// persistent cache.
/// \return A pair consisting of the PI program created with the corresponding
/// device code binary and a boolean that is true if the device code
/// binary was found in the persistent cache and false otherwise.
std::pair<RT::PiProgram, bool>
getOrCreatePIProgram(const RTDeviceBinaryImage &Img, const context &Context,
const device &Device,
const std::string &CompileAndLinkOptions,
SerializedObj SpecConsts);
/// Builds or retrieves from cache a program defining the kernel with given
/// name.
/// \param M idenfies the OS module the kernel comes from (multiple OS modules
Expand Down