Skip to content

Commit 5746906

Browse files
[SYCL] Filter implicit kernel bundle images (#5285)
The runtime currently uses implicit kernel bundles to allow for operations such as setting specialization constants. Since the handler does not know the kernel to launch before it is being launched, the implicit kernel bundle created is not limited to specific kernels. As an effect of this, the kernel bundle may try to build more device images than it needs. These changes make the runtime filter the device images of implicit kernel bundles as soon as the relevant kernel is known. Signed-off-by: Steffen Larsen <[email protected]>
1 parent 2bd8689 commit 5746906

File tree

7 files changed

+94
-24
lines changed

7 files changed

+94
-24
lines changed

sycl/include/CL/sycl/kernel_bundle.hpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -588,8 +588,13 @@ template <typename KernelName> bool is_compatible(const device &Dev) {
588588

589589
namespace detail {
590590

591+
// TODO: This is no longer in use. Remove when ABI break is allowed.
591592
__SYCL_EXPORT std::shared_ptr<detail::kernel_bundle_impl>
592593
join_impl(const std::vector<detail::KernelBundleImplPtr> &Bundles);
594+
595+
__SYCL_EXPORT std::shared_ptr<detail::kernel_bundle_impl>
596+
join_impl(const std::vector<detail::KernelBundleImplPtr> &Bundles,
597+
bundle_state State);
593598
}
594599

595600
/// \returns a new kernel bundle that represents the union of all the device
@@ -604,7 +609,7 @@ join(const std::vector<sycl::kernel_bundle<State>> &Bundles) {
604609
KernelBundleImpls.push_back(detail::getSyclObjImpl(Bundle));
605610

606611
std::shared_ptr<detail::kernel_bundle_impl> Impl =
607-
detail::join_impl(KernelBundleImpls);
612+
detail::join_impl(KernelBundleImpls, State);
608613
return detail::createSyclObjFromImpl<kernel_bundle<State>>(Impl);
609614
}
610615

sycl/source/detail/kernel_bundle_impl.hpp

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ class kernel_bundle_impl {
7979

8080
public:
8181
kernel_bundle_impl(context Ctx, std::vector<device> Devs, bundle_state State)
82-
: MContext(std::move(Ctx)), MDevices(std::move(Devs)) {
82+
: MContext(std::move(Ctx)), MDevices(std::move(Devs)), MState(State) {
8383

8484
common_ctor_checks(State);
8585

@@ -89,7 +89,7 @@ class kernel_bundle_impl {
8989

9090
// Interop constructor used by make_kernel
9191
kernel_bundle_impl(context Ctx, std::vector<device> Devs)
92-
: MContext(Ctx), MDevices(Devs) {
92+
: MContext(Ctx), MDevices(Devs), MState(bundle_state::executable) {
9393
if (!checkAllDevicesAreInContext(Devs, Ctx))
9494
throw sycl::exception(
9595
make_error_code(errc::invalid),
@@ -111,7 +111,8 @@ class kernel_bundle_impl {
111111
kernel_bundle_impl(const kernel_bundle<bundle_state::input> &InputBundle,
112112
std::vector<device> Devs, const property_list &PropList,
113113
bundle_state TargetState)
114-
: MContext(InputBundle.get_context()), MDevices(std::move(Devs)) {
114+
: MContext(InputBundle.get_context()), MDevices(std::move(Devs)),
115+
MState(TargetState) {
115116

116117
MSpecConstValues = getSyclObjImpl(InputBundle)->get_spec_const_map_ref();
117118

@@ -161,7 +162,7 @@ class kernel_bundle_impl {
161162
kernel_bundle_impl(
162163
const std::vector<kernel_bundle<bundle_state::object>> &ObjectBundles,
163164
std::vector<device> Devs, const property_list &PropList)
164-
: MDevices(std::move(Devs)) {
165+
: MDevices(std::move(Devs)), MState(bundle_state::executable) {
165166

166167
if (MDevices.empty())
167168
throw sycl::exception(make_error_code(errc::invalid),
@@ -241,7 +242,7 @@ class kernel_bundle_impl {
241242
kernel_bundle_impl(context Ctx, std::vector<device> Devs,
242243
const std::vector<kernel_id> &KernelIDs,
243244
bundle_state State)
244-
: MContext(std::move(Ctx)), MDevices(std::move(Devs)) {
245+
: MContext(std::move(Ctx)), MDevices(std::move(Devs)), MState(State) {
245246

246247
// TODO: Add a check that all kernel ids are compatible with at least one
247248
// device in Devs
@@ -253,7 +254,7 @@ class kernel_bundle_impl {
253254

254255
kernel_bundle_impl(context Ctx, std::vector<device> Devs,
255256
const DevImgSelectorImpl &Selector, bundle_state State)
256-
: MContext(std::move(Ctx)), MDevices(std::move(Devs)) {
257+
: MContext(std::move(Ctx)), MDevices(std::move(Devs)), MState(State) {
257258

258259
common_ctor_checks(State);
259260

@@ -262,7 +263,9 @@ class kernel_bundle_impl {
262263
}
263264

264265
// C'tor matches sycl::join API
265-
kernel_bundle_impl(const std::vector<detail::KernelBundleImplPtr> &Bundles) {
266+
kernel_bundle_impl(const std::vector<detail::KernelBundleImplPtr> &Bundles,
267+
bundle_state State)
268+
: MState(State) {
266269
if (Bundles.empty())
267270
return;
268271

@@ -480,22 +483,41 @@ class kernel_bundle_impl {
480483

481484
size_t size() const noexcept { return MDeviceImages.size(); }
482485

483-
bundle_state get_bundle_state() const {
484-
// Interop kernel-bundles are always in executable state
485-
if (MIsInterop)
486-
return bundle_state::executable;
487-
// All device images are expected to have the same state
488-
return MDeviceImages.empty()
489-
? bundle_state::input
490-
: detail::getSyclObjImpl(MDeviceImages[0])->get_state();
491-
}
486+
bundle_state get_bundle_state() const { return MState; }
492487

493488
const SpecConstMapT &get_spec_const_map_ref() const noexcept {
494489
return MSpecConstValues;
495490
}
496491

497492
bool isInterop() const { return MIsInterop; }
498493

494+
bool add_kernel(const kernel_id &KernelID, const device &Dev) {
495+
// Skip if kernel is already there
496+
if (has_kernel(KernelID, Dev))
497+
return true;
498+
499+
// First try and get images in current bundle state
500+
const bundle_state BundleState = get_bundle_state();
501+
std::vector<device_image_plain> NewDevImgs =
502+
detail::ProgramManager::getInstance().getSYCLDeviceImages(
503+
MContext, {Dev}, {KernelID}, BundleState);
504+
505+
// No images found so we report as not inserted
506+
if (NewDevImgs.empty())
507+
return false;
508+
509+
// Propagate already set specialization constants to the new images
510+
for (device_image_plain &DevImg : NewDevImgs)
511+
for (auto SpecConst : MSpecConstValues)
512+
getSyclObjImpl(DevImg)->set_specialization_constant_raw_value(
513+
SpecConst.first.c_str(), SpecConst.second.data());
514+
515+
// Add the images to the collection
516+
MDeviceImages.insert(MDeviceImages.end(), NewDevImgs.begin(),
517+
NewDevImgs.end());
518+
return true;
519+
}
520+
499521
private:
500522
context MContext;
501523
std::vector<device> MDevices;
@@ -504,6 +526,7 @@ class kernel_bundle_impl {
504526
// from any device image.
505527
SpecConstMapT MSpecConstValues;
506528
bool MIsInterop = false;
529+
bundle_state MState;
507530
};
508531

509532
} // namespace detail

sycl/source/detail/program_manager/program_manager.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1536,6 +1536,10 @@ std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(
15361536
std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(
15371537
const context &Ctx, const std::vector<device> &Devs,
15381538
const std::vector<kernel_id> &KernelIDs, bundle_state TargetState) {
1539+
// Fast path for when no kernel IDs are requested
1540+
if (KernelIDs.empty())
1541+
return {};
1542+
15391543
{
15401544
std::lock_guard<std::mutex> BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex);
15411545

sycl/source/handler.cpp

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -113,12 +113,10 @@ handler::getOrInsertHandlerKernelBundle(bool Insert) const {
113113

114114
// No kernel bundle yet, create one
115115
if (!KernelBundleImpPtr && Insert) {
116-
KernelBundleImpPtr = detail::getSyclObjImpl(
117-
get_kernel_bundle<bundle_state::input>(MQueue->get_context()));
118-
if (KernelBundleImpPtr->empty()) {
119-
KernelBundleImpPtr = detail::getSyclObjImpl(
120-
get_kernel_bundle<bundle_state::executable>(MQueue->get_context()));
121-
}
116+
// Create an empty kernel bundle to add kernels to later
117+
KernelBundleImpPtr =
118+
detail::getSyclObjImpl(get_kernel_bundle<bundle_state::input>(
119+
MQueue->get_context(), {MQueue->get_device()}, {}));
122120

123121
detail::ExtendedMemberT EMember = {
124122
detail::ExtendedMembersType::HANDLER_KERNEL_BUNDLE, KernelBundleImpPtr};
@@ -169,6 +167,33 @@ event handler::finalize() {
169167
// If there were uses of set_specialization_constant build the kernel_bundle
170168
KernelBundleImpPtr = getOrInsertHandlerKernelBundle(/*Insert=*/false);
171169
if (KernelBundleImpPtr) {
170+
// Make sure implicit non-interop kernel bundles have the kernel
171+
if (!KernelBundleImpPtr->isInterop() &&
172+
!getHandlerImpl()->isStateExplicitKernelBundle()) {
173+
kernel_id KernelID =
174+
detail::ProgramManager::getInstance().getSYCLKernelID(MKernelName);
175+
bool KernelInserted =
176+
KernelBundleImpPtr->add_kernel(KernelID, MQueue->get_device());
177+
// If kernel was not inserted and the bundle is in input mode we try
178+
// building it and trying to find the kernel in executable mode
179+
if (!KernelInserted &&
180+
KernelBundleImpPtr->get_bundle_state() == bundle_state::input) {
181+
auto KernelBundle =
182+
detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
183+
KernelBundleImpPtr);
184+
kernel_bundle<bundle_state::executable> ExecKernelBundle =
185+
build(KernelBundle);
186+
KernelBundleImpPtr = detail::getSyclObjImpl(ExecKernelBundle);
187+
setHandlerKernelBundle(KernelBundleImpPtr);
188+
KernelInserted =
189+
KernelBundleImpPtr->add_kernel(KernelID, MQueue->get_device());
190+
}
191+
// If the kernel was not found in executable mode we throw an exception
192+
if (!KernelInserted)
193+
throw sycl::exception(make_error_code(errc::runtime),
194+
"Failed to add kernel to kernel bundle.");
195+
}
196+
172197
switch (KernelBundleImpPtr->get_bundle_state()) {
173198
case bundle_state::input: {
174199
// Underlying level expects kernel_bundle to be in executable state
@@ -618,6 +643,10 @@ void handler::verifyUsedKernelBundle(const std::string &KernelName) {
618643
if (!UsedKernelBundleImplPtr)
619644
return;
620645

646+
// Implicit kernel bundles are populated late so we ignore them
647+
if (!getHandlerImpl()->isStateExplicitKernelBundle())
648+
return;
649+
621650
kernel_id KernelID = detail::get_kernel_id_impl(KernelName);
622651
device Dev = detail::getDeviceFromHandler(*this);
623652
if (!UsedKernelBundleImplPtr->has_kernel(KernelID, Dev))

sycl/source/kernel_bundle.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,14 @@ get_empty_interop_kernel_bundle_impl(const context &Ctx,
146146

147147
std::shared_ptr<detail::kernel_bundle_impl>
148148
join_impl(const std::vector<detail::KernelBundleImplPtr> &Bundles) {
149-
return std::make_shared<detail::kernel_bundle_impl>(Bundles);
149+
return std::make_shared<detail::kernel_bundle_impl>(Bundles,
150+
bundle_state::input);
151+
}
152+
153+
std::shared_ptr<detail::kernel_bundle_impl>
154+
join_impl(const std::vector<detail::KernelBundleImplPtr> &Bundles,
155+
bundle_state State) {
156+
return std::make_shared<detail::kernel_bundle_impl>(Bundles, State);
150157
}
151158

152159
bool has_kernel_bundle_impl(const context &Ctx, const std::vector<device> &Devs,

sycl/test/abi/sycl_symbols_linux.dump

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3905,6 +3905,7 @@ _ZN2cl4sycl6detail6OSUtil16getCurrentDSODirB5cxx11Ev
39053905
_ZN2cl4sycl6detail6OSUtil17getOSModuleHandleEPKv
39063906
_ZN2cl4sycl6detail6OSUtil7makeDirEPKc
39073907
_ZN2cl4sycl6detail9join_implERKSt6vectorISt10shared_ptrINS1_18kernel_bundle_implEESaIS5_EE
3908+
_ZN2cl4sycl6detail9join_implERKSt6vectorISt10shared_ptrINS1_18kernel_bundle_implEESaIS5_EENS0_12bundle_stateE
39083909
_ZN2cl4sycl6detail9link_implERKSt6vectorINS0_13kernel_bundleILNS0_12bundle_stateE1EEESaIS5_EERKS2_INS0_6deviceESaISA_EERKNS0_13property_listE
39093910
_ZN2cl4sycl6device11get_devicesENS0_4info11device_typeE
39103911
_ZN2cl4sycl6deviceC1EP13_cl_device_id

sycl/test/abi/sycl_symbols_windows.dump

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2432,6 +2432,7 @@
24322432
?is_in_order@queue@sycl@cl@@QEBA_NXZ
24332433
?is_specialization_constant_set@kernel_bundle_plain@detail@sycl@cl@@IEBA_NPEBD@Z
24342434
?join_impl@detail@sycl@cl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@sycl@cl@@@std@@AEBV?$vector@V?$shared_ptr@Vkernel_bundle_impl@detail@sycl@cl@@@std@@V?$allocator@V?$shared_ptr@Vkernel_bundle_impl@detail@sycl@cl@@@std@@@2@@5@@Z
2435+
?join_impl@detail@sycl@cl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@sycl@cl@@@std@@AEBV?$vector@V?$shared_ptr@Vkernel_bundle_impl@detail@sycl@cl@@@std@@V?$allocator@V?$shared_ptr@Vkernel_bundle_impl@detail@sycl@cl@@@std@@@2@@5@W4bundle_state@23@@Z
24352436
?ldexp@__host_std@cl@@YA?AV?$vec@M$00@sycl@2@V342@V?$vec@H$00@42@@Z
24362437
?ldexp@__host_std@cl@@YA?AV?$vec@M$01@sycl@2@V342@V?$vec@H$01@42@@Z
24372438
?ldexp@__host_std@cl@@YA?AV?$vec@M$02@sycl@2@V342@V?$vec@H$02@42@@Z

0 commit comments

Comments
 (0)