Skip to content

[SYCL] Separate host instantiation from HostKernel #18534

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 55 additions & 32 deletions sycl/include/sycl/detail/cg_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,32 +124,25 @@ struct KernelLambdaHasKernelHandlerArgT {

// Helpers for running kernel lambda on the host device

template <typename KernelType>
typename std::enable_if_t<KernelLambdaHasKernelHandlerArgT<KernelType>::value>
runKernelWithoutArg(KernelType KernelName) {
kernel_handler KH;
KernelName(KH);
}

template <typename KernelType>
typename std::enable_if_t<!KernelLambdaHasKernelHandlerArgT<KernelType>::value>
runKernelWithoutArg(KernelType KernelName) {
KernelName();
}

template <typename ArgType, typename KernelType>
typename std::enable_if_t<
KernelLambdaHasKernelHandlerArgT<KernelType, ArgType>::value>
runKernelWithArg(KernelType KernelName, ArgType Arg) {
kernel_handler KH;
KernelName(Arg, KH);
template <typename KernelType, bool HasKernelHandlerArg>
void runKernelWithoutArg(KernelType KernelName,
const std::bool_constant<HasKernelHandlerArg> &) {
if constexpr (HasKernelHandlerArg) {
kernel_handler KH;
KernelName(KH);
} else {
KernelName();
}
}

template <typename ArgType, typename KernelType>
typename std::enable_if_t<
!KernelLambdaHasKernelHandlerArgT<KernelType, ArgType>::value>
runKernelWithArg(KernelType KernelName, ArgType Arg) {
KernelName(Arg);
template <typename ArgType, typename KernelType, bool HasKernelHandlerArg>
void runKernelWithArg(KernelType KernelName, ArgType Arg,
const std::bool_constant<HasKernelHandlerArg> &) {
if constexpr (HasKernelHandlerArg) {
kernel_handler KH;
KernelName(Arg, KH);
} else {
KernelName(Arg);
}
}

// The pure virtual class aimed to store lambda/functors of any type.
Expand All @@ -159,8 +152,10 @@ class HostKernelBase {
// Used to extract captured variables.
virtual char *getPtr() = 0;
virtual ~HostKernelBase() = default;
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
// NOTE: InstatiateKernelOnHost() should not be called.
virtual void InstantiateKernelOnHost() = 0;
#endif
};

// Class which stores specific lambda object.
Expand All @@ -176,17 +171,21 @@ class HostKernel : public HostKernelBase {

~HostKernel() = default;

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
// This function is needed for host-side compilation to keep kernels
// instantitated. This is important for debuggers to be able to associate
// kernel code instructions with source code lines.
// NOTE: InstatiateKernelOnHost() should not be called.
void InstantiateKernelOnHost() override {
using IDBuilder = sycl::detail::Builder;
constexpr bool HasKernelHandlerArg =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why can't we just call "new" instantiation here?

KernelLambdaHasKernelHandlerArgT<KernelType, KernelArgType>::value;
if constexpr (std::is_same_v<KernelArgType, void>) {
runKernelWithoutArg(MKernel);
runKernelWithoutArg(MKernel, std::bool_constant<HasKernelHandlerArg>());
} else if constexpr (std::is_same_v<KernelArgType, sycl::id<Dims>>) {
sycl::id ID = InitializedVal<Dims, id>::template get<0>();
runKernelWithArg<const KernelArgType &>(MKernel, ID);
runKernelWithArg<const KernelArgType &>(
MKernel, ID, std::bool_constant<HasKernelHandlerArg>());
} else if constexpr (std::is_same_v<KernelArgType, item<Dims, true>> ||
std::is_same_v<KernelArgType, item<Dims, false>>) {
constexpr bool HasOffset =
Expand All @@ -195,13 +194,15 @@ class HostKernel : public HostKernelBase {
KernelArgType Item = IDBuilder::createItem<Dims, HasOffset>(
InitializedVal<Dims, range>::template get<1>(),
InitializedVal<Dims, id>::template get<0>());
runKernelWithArg<KernelArgType>(MKernel, Item);
runKernelWithArg<KernelArgType>(
MKernel, Item, std::bool_constant<HasKernelHandlerArg>());
} else {
KernelArgType Item = IDBuilder::createItem<Dims, HasOffset>(
InitializedVal<Dims, range>::template get<1>(),
InitializedVal<Dims, id>::template get<0>(),
InitializedVal<Dims, id>::template get<0>());
runKernelWithArg<KernelArgType>(MKernel, Item);
runKernelWithArg<KernelArgType>(
MKernel, Item, std::bool_constant<HasKernelHandlerArg>());
}
} else if constexpr (std::is_same_v<KernelArgType, nd_item<Dims>>) {
sycl::range<Dims> Range = InitializedVal<Dims, range>::template get<1>();
Expand All @@ -214,22 +215,44 @@ class HostKernel : public HostKernelBase {
IDBuilder::createItem<Dims, false>(Range, ID);
KernelArgType NDItem =
IDBuilder::createNDItem<Dims>(GlobalItem, LocalItem, Group);
runKernelWithArg<const KernelArgType>(MKernel, NDItem);
runKernelWithArg<const KernelArgType>(
MKernel, NDItem, std::bool_constant<HasKernelHandlerArg>());
} else if constexpr (std::is_same_v<KernelArgType, sycl::group<Dims>>) {
sycl::range<Dims> Range = InitializedVal<Dims, range>::template get<1>();
sycl::id<Dims> ID = InitializedVal<Dims, id>::template get<0>();
KernelArgType Group =
IDBuilder::createGroup<Dims>(Range, Range, Range, ID);
runKernelWithArg<KernelArgType>(MKernel, Group);
runKernelWithArg<KernelArgType>(
MKernel, Group, std::bool_constant<HasKernelHandlerArg>());
} else {
// Assume that anything else can be default-constructed. If not, this
// should fail to compile and the implementor should implement a generic
// case for the new argument type.
runKernelWithArg<KernelArgType>(MKernel, KernelArgType{});
runKernelWithArg<KernelArgType>(
MKernel, KernelArgType{}, std::bool_constant<HasKernelHandlerArg>());
}
}
#endif
};

// This function is needed for host-side compilation to keep kernels
// instantitated. This is important for debuggers to be able to associate
// kernel code instructions with source code lines.
template <class KernelType, class KernelArgType, int Dims>
constexpr void *GetInstantiateKernelOnHostPtr() {
if constexpr (std::is_same_v<KernelArgType, void>) {
constexpr bool HasKernelHandlerArg =
KernelLambdaHasKernelHandlerArgT<KernelType>::value;
return reinterpret_cast<void *>(
&runKernelWithoutArg<KernelType, HasKernelHandlerArg>);
} else {
constexpr bool HasKernelHandlerArg =
KernelLambdaHasKernelHandlerArgT<KernelType, KernelArgType>::value;
return reinterpret_cast<void *>(
&runKernelWithArg<KernelArgType, KernelType, HasKernelHandlerArg>);
}
}

} // namespace detail
} // namespace _V1
} // namespace sycl
10 changes: 10 additions & 0 deletions sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -769,6 +769,14 @@ class __SYCL_EXPORT handler {
MHostKernel.reset(new detail::HostKernel<KernelType, LambdaArgType, Dims>(
std::forward<KernelTypeUniversalRef>(KernelFunc)));

// Instantiating the kernel on the host improves debugging.
// Passing this pointer to another translation unit prevents optimization.
#ifndef NDEBUG
instantiateKernelOnHost(
detail::GetInstantiateKernelOnHostPtr<KernelType, LambdaArgType,
Dims>());
#endif

constexpr bool KernelHasName =
detail::getKernelName<KernelName>() != nullptr &&
detail::getKernelName<KernelName>()[0] != '\0';
Expand Down Expand Up @@ -3770,6 +3778,8 @@ class __SYCL_EXPORT handler {
detail::kernel_param_desc_t (*KernelParamDescGetter)(int),
bool KernelIsESIMD, bool KernelHasSpecialCaptures);

void instantiateKernelOnHost(void *InstantiateKernelOnHostPtr);

friend class detail::HandlerAccess;

#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
Expand Down
6 changes: 6 additions & 0 deletions sycl/source/handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2285,6 +2285,12 @@ void handler::setKernelInfo(
impl->MKernelHasSpecialCaptures = KernelHasSpecialCaptures;
}

void handler::instantiateKernelOnHost(void *InstantiateKernelOnHostPtr) {
// Passing the pointer to the runtime is enough to prevent optimization.
// We don't need to use the pointer for anything.
(void)InstantiateKernelOnHostPtr;
}

void handler::saveCodeLoc(detail::code_location CodeLoc, bool IsTopCodeLoc) {
MCodeLoc = CodeLoc;
impl->MIsTopCodeLoc = IsTopCodeLoc;
Expand Down
1 change: 1 addition & 0 deletions sycl/test/abi/sycl_symbols_linux.dump
Original file line number Diff line number Diff line change
Expand Up @@ -3566,6 +3566,7 @@ _ZN4sycl3_V17handler22setHandlerKernelBundleENS0_6kernelE
_ZN4sycl3_V17handler22setHandlerKernelBundleERKSt10shared_ptrINS0_6detail18kernel_bundle_implEE
_ZN4sycl3_V17handler22setKernelClusterLaunchENS0_5rangeILi3EEEi
_ZN4sycl3_V17handler22setKernelIsCooperativeEb
_ZN4sycl3_V17handler23instantiateKernelOnHostEPv
_ZN4sycl3_V17handler24GetRangeRoundingSettingsERmS2_S2_
_ZN4sycl3_V17handler24ext_intel_read_host_pipeENS0_6detail11string_viewEPvmb
_ZN4sycl3_V17handler24ext_oneapi_memcpy2d_implEPvmPKvmmm
Expand Down
1 change: 1 addition & 0 deletions sycl/test/abi/sycl_symbols_windows.dump
Original file line number Diff line number Diff line change
Expand Up @@ -4229,6 +4229,7 @@
?has_kernel_bundle_impl@detail@_V1@sycl@@YA_NAEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@W4bundle_state@23@@Z
?has_specialization_constant_impl@kernel_bundle_plain@detail@_V1@sycl@@IEBA_NPEBD@Z
?increase_threshold_to@memory_pool@experimental@oneapi@ext@_V1@sycl@@QEAAX_K@Z
?instantiateKernelOnHost@handler@_V1@sycl@@AEAAXPEAX@Z
?internalProfilingTagImpl@handler@_V1@sycl@@AEAAXXZ
?isBackendSupportedFillSize@handler@_V1@sycl@@CA_N_K@Z
?isConstOrGlobal@handler@_V1@sycl@@CA_NW4target@access@23@@Z
Expand Down