Skip to content

Commit 216137b

Browse files
authored
[SYCL][Fusion] Add kernel fusion extension API (#7416)
This is the first patch in a series of patches to add an implementation of the [kernel fusion extension](#7098). We have split the implementation into multiple patches to make them more easy to review. This first patch introduces the user-facing API discussed in the [extension proposal](#7098). It does not yet add any fusion functionality, just the mere API and SYCL properties. Calls to the API will throw an error until the remaining functionality lands with the following patches. The design document for the overall implementation of kernel fusion can be found [here](#7204). Signed-off-by: Lukas Sommer <[email protected]>
1 parent 5567fe5 commit 216137b

File tree

15 files changed

+386
-1
lines changed

15 files changed

+386
-1
lines changed

sycl/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,9 @@ install(DIRECTORY ${OpenCL_INCLUDE_DIR}/CL
144144
DESTINATION ${SYCL_INCLUDE_DIR}/sycl
145145
COMPONENT OpenCL-Headers)
146146

147+
# Option to enable online kernel fusion via a JIT compiler
148+
option(SYCL_ENABLE_KERNEL_FUSION "Enable kernel fusion via JIT compiler" OFF)
149+
147150
# Needed for feature_test.hpp
148151
if ("cuda" IN_LIST SYCL_ENABLE_PLUGINS)
149152
set(SYCL_BUILD_PI_CUDA ON)

sycl/include/sycl/detail/property_helper.hpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,13 @@ enum DataLessPropKind {
3535
UseDefaultStream = 8,
3636
DiscardEvents = 9,
3737
DeviceReadOnly = 10,
38+
FusionPromotePrivate = 11,
39+
FusionPromoteLocal = 12,
40+
FusionNoBarrier = 13,
41+
FusionEnable = 14,
42+
FusionForce = 15,
3843
// Indicates the last known dataless property.
39-
LastKnownDataLessPropKind = 10,
44+
LastKnownDataLessPropKind = 15,
4045
// Exceeding 32 may cause ABI breaking change on some of OSes.
4146
DataLessPropKindSize = 32
4247
};
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
//==----------- fusion_properties.hpp --- SYCL fusion properties -----------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include <sycl/access/access.hpp>
12+
#include <sycl/detail/property_helper.hpp>
13+
#include <sycl/properties/property_traits.hpp>
14+
15+
namespace sycl {
16+
__SYCL_INLINE_VER_NAMESPACE(_V1) {
17+
namespace ext {
18+
namespace codeplay {
19+
namespace experimental {
20+
namespace property {
21+
22+
class promote_private
23+
: public detail::DataLessProperty<detail::FusionPromotePrivate> {};
24+
25+
class promote_local
26+
: public detail::DataLessProperty<detail::FusionPromoteLocal> {};
27+
28+
class no_barriers : public detail::DataLessProperty<detail::FusionNoBarrier> {};
29+
30+
class force_fusion : public detail::DataLessProperty<detail::FusionForce> {};
31+
32+
namespace queue {
33+
class enable_fusion : public detail::DataLessProperty<detail::FusionEnable> {};
34+
} // namespace queue
35+
36+
} // namespace property
37+
} // namespace experimental
38+
} // namespace codeplay
39+
} // namespace ext
40+
41+
// Forward declarations
42+
template <typename T, int Dimensions, typename AllocatorT, typename Enable>
43+
class buffer;
44+
45+
template <typename DataT, int Dimensions, access::mode AccessMode,
46+
access::target AccessTarget, access::placeholder IsPlaceholder,
47+
typename PropertyListT>
48+
class accessor;
49+
50+
class queue;
51+
52+
// Property trait specializations.
53+
template <>
54+
struct is_property<ext::codeplay::experimental::property::promote_private>
55+
: std::true_type {};
56+
57+
template <>
58+
struct is_property<ext::codeplay::experimental::property::promote_local>
59+
: std::true_type {};
60+
61+
template <>
62+
struct is_property<ext::codeplay::experimental::property::no_barriers>
63+
: std::true_type {};
64+
65+
template <>
66+
struct is_property<ext::codeplay::experimental::property::force_fusion>
67+
: std::true_type {};
68+
69+
template <>
70+
struct is_property<ext::codeplay::experimental::property::queue::enable_fusion>
71+
: std::true_type {};
72+
73+
// Buffer property trait specializations
74+
template <typename T, int Dimensions, typename AllocatorT>
75+
struct is_property_of<ext::codeplay::experimental::property::promote_private,
76+
buffer<T, Dimensions, AllocatorT, void>>
77+
: std::true_type {};
78+
79+
template <typename T, int Dimensions, typename AllocatorT>
80+
struct is_property_of<ext::codeplay::experimental::property::promote_local,
81+
buffer<T, Dimensions, AllocatorT, void>>
82+
: std::true_type {};
83+
84+
// Accessor property trait specializations
85+
template <typename DataT, int Dimensions, access::mode AccessMode,
86+
access::target AccessTarget, access::placeholder IsPlaceholder,
87+
typename PropertyListT>
88+
struct is_property_of<ext::codeplay::experimental::property::promote_private,
89+
accessor<DataT, Dimensions, AccessMode, AccessTarget,
90+
IsPlaceholder, PropertyListT>> : std::true_type {
91+
};
92+
93+
template <typename DataT, int Dimensions, access::mode AccessMode,
94+
access::target AccessTarget, access::placeholder IsPlaceholder,
95+
typename PropertyListT>
96+
struct is_property_of<ext::codeplay::experimental::property::promote_local,
97+
accessor<DataT, Dimensions, AccessMode, AccessTarget,
98+
IsPlaceholder, PropertyListT>> : std::true_type {
99+
};
100+
101+
// Queue property trait specializations
102+
template <>
103+
struct is_property_of<
104+
ext::codeplay::experimental::property::queue::enable_fusion, queue>
105+
: std::true_type {};
106+
107+
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
108+
} // namespace sycl
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
//==---- fusion_wrapper.hpp --- SYCL wrapper for queue for kernel fusion ---==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include <sycl/queue.hpp>
12+
13+
namespace sycl {
14+
__SYCL_INLINE_VER_NAMESPACE(_V1) {
15+
16+
namespace detail {
17+
class fusion_wrapper_impl;
18+
}
19+
20+
namespace ext {
21+
namespace codeplay {
22+
namespace experimental {
23+
24+
///
25+
/// A wrapper wrapping a sycl::queue to provide access to the kernel fusion API,
26+
/// allowing to manage kernel fusion on the wrapped queue.
27+
class __SYCL_EXPORT fusion_wrapper {
28+
29+
public:
30+
///
31+
/// Wrap a queue to get access to the kernel fusion API.
32+
///
33+
/// @throw sycl::exception with errc::invalid if trying to construct a wrapper
34+
/// on a queue which doesn't support fusion.
35+
explicit fusion_wrapper(queue &q);
36+
37+
///
38+
/// Access the queue wrapped by this fusion wrapper.
39+
queue get_queue() const;
40+
41+
///
42+
/// @brief Check whether the wrapped queue is in fusion mode or not.
43+
bool is_in_fusion_mode() const;
44+
45+
///
46+
/// @brief Set the wrapped queue into "fusion mode". This means that the
47+
/// kernels that are submitted in subsequent calls to queue::submit() are not
48+
/// submitted for execution right away, but rather added to a list of kernels
49+
/// that should be fused.
50+
///
51+
/// @throw sycl::exception with errc::invalid if this operation is called on a
52+
/// queue which is already in fusion mode.
53+
void start_fusion();
54+
55+
///
56+
/// @brief Cancel the fusion and submit all kernels submitted since the last
57+
/// start_fusion() for immediate execution without fusion. The kernels are
58+
/// executed in the same order as they were initially submitted to the wrapped
59+
/// queue.
60+
///
61+
/// This operation is asynchronous, i.e., it may return after the previously
62+
/// submitted kernels have been passed to the scheduler, but before any of the
63+
/// previously submitted kernel starts or completes execution. The events
64+
/// returned by submit() since the last call to start_fusion remain valid and
65+
/// can be used for synchronization.
66+
///
67+
/// The queue is not in "fusion mode" anymore after this calls returns, until
68+
/// the next start_fusion().
69+
void cancel_fusion();
70+
71+
///
72+
/// @brief Complete the fusion: JIT-compile a fused kernel from all kernels
73+
/// submitted to the wrapped queue since the last start_fusion and submit the
74+
/// fused kernel for execution. Inside the fused kernel, the per-work-item
75+
/// effects are executed in the same order as the kernels were initially
76+
/// submitted.
77+
///
78+
/// This operation is asynchronous, i.e., it may return after the JIT
79+
/// compilation is executed and the fused kernel is passed to the scheduler,
80+
/// but before the fused kernel starts or completes execution. The returned
81+
/// event allows to synchronize with the execution of the fused kernel. All
82+
/// events returned by queue::submit since the last call to start_fusion
83+
/// remain valid.
84+
///
85+
/// The wrapped queue is not in "fusion mode" anymore after this calls
86+
/// returns, until the next start_fusion().
87+
///
88+
/// @param properties Properties to take into account when performing fusion.
89+
event complete_fusion(const property_list &propList = {});
90+
91+
private:
92+
std::shared_ptr<detail::fusion_wrapper_impl> MImpl;
93+
};
94+
} // namespace experimental
95+
} // namespace codeplay
96+
} // namespace ext
97+
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
98+
} // namespace sycl

sycl/include/sycl/feature_test.hpp.in

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,10 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) {
7878
#if SYCL_BUILD_PI_HIP
7979
#define SYCL_EXT_ONEAPI_BACKEND_HIP 1
8080
#endif
81+
#cmakedefine01 SYCL_ENABLE_KERNEL_FUSION
82+
#if SYCL_ENABLE_KERNEL_FUSION
83+
#define SYCL_EXT_CODEPLAY_KERNEL_FUSION 1
84+
#endif
8185

8286
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
8387
} // namespace sycl

sycl/include/sycl/properties/all_properties.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include <sycl/ext/codeplay/experimental/fusion_properties.hpp>
12
#include <sycl/properties/accessor_properties.hpp>
23
#include <sycl/properties/buffer_properties.hpp>
34
#include <sycl/properties/context_properties.hpp>

sycl/include/sycl/queue.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1251,6 +1251,14 @@ class __SYCL_EXPORT queue {
12511251
} _CODELOCFW(CodeLoc));
12521252
}
12531253

1254+
/// @brief Returns true if the queue was created with the
1255+
/// ext::codeplay::experimental::property::queue::enable_fusion property.
1256+
///
1257+
/// Equivalent to
1258+
/// `has_property<ext::codeplay::experimental::property::queue::enable_fusion>()`.
1259+
///
1260+
bool ext_codeplay_supports_fusion() const;
1261+
12541262
// Clean KERNELFUNC macros.
12551263
#undef _KERNELFUNCPARAM
12561264

sycl/include/sycl/sycl.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
#if SYCL_EXT_ONEAPI_BACKEND_LEVEL_ZERO
6060
#include <sycl/ext/oneapi/backend/level_zero.hpp>
6161
#endif
62+
#include <sycl/ext/codeplay/experimental/fusion_wrapper.hpp>
6263
#include <sycl/ext/oneapi/device_global/device_global.hpp>
6364
#include <sycl/ext/oneapi/device_global/properties.hpp>
6465
#include <sycl/ext/oneapi/experimental/builtins.hpp>

sycl/source/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ set(SYCL_SOURCES
132132
"detail/error_handling/enqueue_kernel.cpp"
133133
"detail/event_impl.cpp"
134134
"detail/filter_selector_impl.cpp"
135+
"detail/fusion/fusion_wrapper.cpp"
136+
"detail/fusion/fusion_wrapper_impl.cpp"
135137
"detail/global_handler.cpp"
136138
"detail/helpers.cpp"
137139
"detail/handler_proxy.cpp"
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
//==------------ fusion_wrapper.cpp ----------------------------------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <sycl/ext/codeplay/experimental/fusion_wrapper.hpp>
10+
11+
#include <detail/fusion/fusion_wrapper_impl.hpp>
12+
#include <detail/queue_impl.hpp>
13+
14+
namespace sycl {
15+
__SYCL_INLINE_VER_NAMESPACE(_V1) {
16+
namespace ext {
17+
namespace codeplay {
18+
namespace experimental {
19+
20+
fusion_wrapper::fusion_wrapper(queue &Queue) {
21+
if (!Queue.ext_codeplay_supports_fusion()) {
22+
throw sycl::exception(
23+
sycl::errc::invalid,
24+
"Cannot wrap a queue for fusion which doesn't support fusion");
25+
}
26+
MImpl = std::make_shared<detail::fusion_wrapper_impl>(
27+
sycl::detail::getSyclObjImpl(Queue));
28+
}
29+
30+
queue fusion_wrapper::get_queue() const {
31+
return sycl::detail::createSyclObjFromImpl<sycl::queue>(MImpl->get_queue());
32+
}
33+
34+
bool fusion_wrapper::is_in_fusion_mode() const {
35+
return MImpl->is_in_fusion_mode();
36+
}
37+
38+
void fusion_wrapper::start_fusion() { MImpl->start_fusion(); }
39+
40+
void fusion_wrapper::cancel_fusion() { MImpl->cancel_fusion(); }
41+
42+
event fusion_wrapper::complete_fusion(const property_list &PropList) {
43+
return MImpl->complete_fusion(PropList);
44+
}
45+
46+
} // namespace experimental
47+
} // namespace codeplay
48+
} // namespace ext
49+
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
50+
} // namespace sycl
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
//==------------ fusion_wrapper.cpp ----------------------------------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <detail/fusion/fusion_wrapper_impl.hpp>
10+
11+
namespace sycl {
12+
__SYCL_INLINE_VER_NAMESPACE(_V1) {
13+
namespace detail {
14+
15+
fusion_wrapper_impl::fusion_wrapper_impl(
16+
std::shared_ptr<detail::queue_impl> Queue)
17+
: MQueue{std::move(Queue)} {}
18+
19+
std::shared_ptr<detail::queue_impl> fusion_wrapper_impl::get_queue() const {
20+
return MQueue;
21+
}
22+
23+
bool fusion_wrapper_impl::is_in_fusion_mode() const { return false; }
24+
25+
void fusion_wrapper_impl::start_fusion() {
26+
throw sycl::exception(sycl::errc::feature_not_supported,
27+
"Fusion not yet implemented");
28+
}
29+
30+
void fusion_wrapper_impl::cancel_fusion() {
31+
throw sycl::exception(sycl::errc::feature_not_supported,
32+
"Fusion not yet implemented");
33+
}
34+
35+
event fusion_wrapper_impl::complete_fusion(const property_list &PropList) {
36+
(void)PropList;
37+
throw sycl::exception(sycl::errc::feature_not_supported,
38+
"Fusion not yet implemented");
39+
}
40+
41+
} // namespace detail
42+
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
43+
} // namespace sycl

0 commit comments

Comments
 (0)