Skip to content

[SYCL][Graph] Command Graph PoC #7627

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 37 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
1acf57e
Inital version of sycl graph prototype
reble Feb 18, 2022
d286c71
Adding initial sycl graph doc
reble Feb 18, 2022
656f5c3
Adding lazy execution property to queue
reble Feb 15, 2022
0bad787
fix merge
reble Feb 22, 2022
a8b5b32
Update pi_level_zero.cpp
reble Feb 22, 2022
2b50af4
update extension proposal started to incorporate feedback
reble Mar 11, 2022
047839b
typo
reble Mar 11, 2022
f957996
fix typos and syntax issues
reble May 3, 2022
0d8a5f4
Apply suggestions from code review
reble Mar 14, 2022
50d49a1
Propagate lazy queue property
julianmi May 3, 2022
9b46c4b
fix formatting issues
reble May 6, 2022
7d81618
fix issue introd. by recent merge
reble May 6, 2022
7917086
fix formatting
reble May 10, 2022
a3164de
update API to recent proposal
reble Oct 12, 2022
8850b18
fix rebase issue
reble Oct 12, 2022
446ac53
revert changes to level-zero plugin
reble Oct 18, 2022
fa7494d
starting to rework lazy execution logic
reble Oct 18, 2022
7581915
bugfix
reble Oct 18, 2022
38da3c6
add basic tests
reble Oct 18, 2022
fa58aa3
renaming macro and bugfix
reble Oct 20, 2022
4478390
clang-format
reble Nov 1, 2022
383459c
Renaming variables
reble Nov 1, 2022
f71ea49
Common changes from record & replay API (#32)
EwanC Nov 21, 2022
df971e5
[SYCL] Minor graph classes refactor (#36)
Bensuo Nov 24, 2022
2cf9d0f
Cosmetic changes
reble Nov 30, 2022
9f127d7
[SYCL] Repro for reduction fail
EwanC Nov 18, 2022
578692f
[SYCL] PIMPL refactor
EwanC Nov 24, 2022
7bb11ce
[SYCL] Use handler to execute graph
EwanC Nov 30, 2022
3073cfc
[SYCL] Clean-up lazy queue PI changes
EwanC Dec 2, 2022
c99bdca
[SYCL] Fix reductions not working inside graph
Bensuo Dec 13, 2022
1448cb5
[SYCL] Enable submitting sub-graphs
Bensuo Dec 21, 2022
fb28d59
[SYCL] Rename exec_graph to ext_oneapi_graph
Bensuo Jan 9, 2023
4a306ed
[SYCL] Add unit tests for command graph POC
Bensuo Jan 11, 2023
1249fbc
[SYCL] Pass property_list to APIs
EwanC Jan 19, 2023
0ac7a7e
Adding new example using make edge function (#63)
reble Jan 19, 2023
06c588f
Apply suggestions from code review
reble Feb 9, 2023
d4c1ed3
[SYCL] Record & Replay Implementation
EwanC Feb 27, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion sycl/include/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,10 @@
// 10.13 Added new PI_EXT_ONEAPI_QUEUE_DISCARD_EVENTS queue property.
// 10.14 Add PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY as an extension for
// piDeviceGetInfo.
// 10.15 Add new PI_EXT_ONEAPI_QUEUE_LAZY_EXECUTION queue property

#define _PI_H_VERSION_MAJOR 10
#define _PI_H_VERSION_MINOR 14
#define _PI_H_VERSION_MINOR 15

#define _PI_STRING_HELPER(a) #a
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
Expand Down Expand Up @@ -569,6 +570,14 @@ constexpr pi_queue_properties PI_QUEUE_PROFILING_ENABLE = (1 << 1);
constexpr pi_queue_properties PI_QUEUE_ON_DEVICE = (1 << 2);
constexpr pi_queue_properties PI_QUEUE_ON_DEVICE_DEFAULT = (1 << 3);
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_DISCARD_EVENTS = (1 << 4);
// In a lazy queue, enqueued commands are not submitted for execution
// immediately, instead they are submitted for execution once the queue is
// flushed.
//
// This is to enable prototyping of the SYCL_EXT_ONEAPI_GRAPH extension,
// before a native command-list interface in PI can be designed and
// implemented.
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_LAZY_EXECUTION = (1 << 5);

using pi_result = _pi_result;
using pi_platform_info = _pi_platform_info;
Expand Down
3 changes: 2 additions & 1 deletion sycl/include/sycl/detail/property_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@ enum DataLessPropKind {
UseDefaultStream = 8,
DiscardEvents = 9,
DeviceReadOnly = 10,
LazyExecution = 11,
// Indicates the last known dataless property.
LastKnownDataLessPropKind = 10,
LastKnownDataLessPropKind = 11,
// Exceeding 32 may cause ABI breaking change on some of OSes.
DataLessPropKindSize = 32
};
Expand Down
144 changes: 144 additions & 0 deletions sycl/include/sycl/ext/oneapi/experimental/graph.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
//==--------- graph.hpp --- SYCL graph extension ---------------------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <memory>
#include <vector>

#include <sycl/detail/common.hpp>
#include <sycl/detail/defines_elementary.hpp>
#include <sycl/property_list.hpp>

namespace sycl {
__SYCL_INLINE_VER_NAMESPACE(_V1) {

class handler;
class queue;
namespace ext {
namespace oneapi {
namespace experimental {

namespace detail {
struct node_impl;
struct graph_impl;

using node_ptr = std::shared_ptr<node_impl>;
using graph_ptr = std::shared_ptr<graph_impl>;
} // namespace detail

enum class graph_state {
modifiable,
executable,
};

class __SYCL_EXPORT node {
private:
node(detail::node_ptr Impl) : impl(Impl) {}

template <class Obj>
friend decltype(Obj::impl)
sycl::detail::getSyclObjImpl(const Obj &SyclObject);
template <class T>
friend T sycl::detail::createSyclObjFromImpl(decltype(T::impl) ImplObj);

detail::node_ptr impl;
detail::graph_ptr MGraph;
};

template <graph_state State = graph_state::modifiable>
class __SYCL_EXPORT command_graph {
public:
command_graph(const property_list &propList = {});

// Adding empty node with [0..n] predecessors:
node add(const std::vector<node> &dep = {});

// Adding device node:
template <typename T> node add(T cgf, const std::vector<node> &dep = {}) {
return add_impl(cgf, dep);
}

// Adding dependency between two nodes.
void make_edge(node sender, node receiver);

command_graph<graph_state::executable>
finalize(const sycl::context &syclContext,
const property_list &propList = {}) const;

/// Change the state of a queue to be recording and associate this graph with
/// it.
/// @param recordingQueue The queue to change state on and associate this
/// graph with.
/// @return True if the queue had its state changed from executing to
/// recording.
bool begin_recording(queue recordingQueue);

/// Change the state of multiple queues to be recording and associate this
/// graph with each of them.
/// @param recordingQueues The queues to change state on and associate this
/// graph with.
/// @return True if any queue had its state changed from executing to
/// recording.
bool begin_recording(const std::vector<queue> &recordingQueues);

/// Set all queues currently recording to this graph to the executing state.
/// @return True if any queue had its state changed from recording to
/// executing.
bool end_recording();

/// Set a queues currently recording to this graph to the executing state.
/// @param recordingQueue The queue to change state on.
/// @return True if the queue had its state changed from recording to
/// executing.
bool end_recording(queue recordingQueue);

/// Set multiple queues currently recording to this graph to the executing
/// state.
/// @param recordingQueue The queues to change state on.
/// @return True if any queue had its state changed from recording to
/// executing.
bool end_recording(const std::vector<queue> &recordingQueues);

private:
command_graph(detail::graph_ptr Impl) : impl(Impl) {}

// Template-less implementation of add()
node add_impl(std::function<void(handler &)> cgf,
const std::vector<node> &dep);

template <class Obj>
friend decltype(Obj::impl)
sycl::detail::getSyclObjImpl(const Obj &SyclObject);
template <class T>
friend T sycl::detail::createSyclObjFromImpl(decltype(T::impl) ImplObj);

detail::graph_ptr impl;
};

template <> class __SYCL_EXPORT command_graph<graph_state::executable> {
public:
command_graph() = delete;

command_graph(detail::graph_ptr g, const sycl::context &ctx)
: MTag(rand()), MCtx(ctx), impl(g) {}

private:
template <class Obj>
friend decltype(Obj::impl)
sycl::detail::getSyclObjImpl(const Obj &SyclObject);

int MTag;
const sycl::context &MCtx;
detail::graph_ptr impl;
};
} // namespace experimental
} // namespace oneapi
} // namespace ext
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl
1 change: 1 addition & 0 deletions sycl/include/sycl/feature_test.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) {
#define SYCL_EXT_ONEAPI_FREE_FUNCTION_QUERIES 1
#define SYCL_EXT_ONEAPI_GROUP_ALGORITHMS 1
#define SYCL_EXT_ONEAPI_GROUP_SORT 1
#define SYCL_EXT_ONEAPI_LAZY_QUEUE 1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is not intended for use by the user, is there a reason to expose it as a feature?

#define SYCL_EXT_ONEAPI_MAX_WORK_GROUP_QUERY 1
#define SYCL_EXT_ONEAPI_ND_RANGE_REDUCTIONS 1
#define SYCL_EXT_ONEAPI_DEFAULT_CONTEXT 1
Expand Down
9 changes: 9 additions & 0 deletions sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
#include <sycl/sampler.hpp>
#include <sycl/stl.hpp>

#include <sycl/ext/oneapi/experimental/graph.hpp>

#include <functional>
#include <limits>
#include <memory>
Expand Down Expand Up @@ -2516,6 +2518,13 @@ class __SYCL_EXPORT handler {
/// \param Advice is a device-defined advice for the specified allocation.
void mem_advise(const void *Ptr, size_t Length, int Advice);

/// Executes a command_graph.
///
/// \param Graph Executable command_graph to run
void ext_oneapi_graph(ext::oneapi::experimental::command_graph<
ext::oneapi::experimental::graph_state::executable>
Graph);

private:
std::shared_ptr<detail::handler_impl> MImpl;
std::shared_ptr<detail::queue_impl> MQueue;
Expand Down
8 changes: 8 additions & 0 deletions sycl/include/sycl/properties/queue_properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ namespace property {
namespace queue {
class discard_events
: public ::sycl::detail::DataLessProperty<::sycl::detail::DiscardEvents> {};
class lazy_execution
: public ::sycl::detail::DataLessProperty<::sycl::detail::LazyExecution> {};
} // namespace queue
} // namespace property

Expand Down Expand Up @@ -65,6 +67,9 @@ template <>
struct is_property<ext::oneapi::property::queue::discard_events>
: std::true_type {};
template <>
struct is_property<ext::oneapi::property::queue::lazy_execution>
: std::true_type {};
template <>
struct is_property<property::queue::cuda::use_default_stream> : std::true_type {
};
template <>
Expand All @@ -80,6 +85,9 @@ template <>
struct is_property_of<ext::oneapi::property::queue::discard_events, queue>
: std::true_type {};
template <>
struct is_property_of<ext::oneapi::property::queue::lazy_execution, queue>
: std::true_type {};
template <>
struct is_property_of<property::queue::cuda::use_default_stream, queue>
: std::true_type {};
template <>
Expand Down
58 changes: 58 additions & 0 deletions sycl/include/sycl/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <sycl/property_list.hpp>
#include <sycl/stl.hpp>


// Explicitly request format macros
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS 1
Expand Down Expand Up @@ -72,6 +73,16 @@ static event submitAssertCapture(queue &, event &, queue *,
#endif
} // namespace detail

namespace ext {
namespace oneapi {
namespace experimental {
// State of a queue with regards to graph recording,
// returned by info::queue::state
enum class queue_state { executing, recording };
} // namespace experimental
} // namespace oneapi
} // namespace ext

/// Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
///
/// A SYCL queue can be used to submit command groups to be executed by the SYCL
Expand Down Expand Up @@ -1058,6 +1069,53 @@ class __SYCL_EXPORT queue {
// Clean KERNELFUNC macros.
#undef _KERNELFUNCPARAM

/// Shortcut for executing a graph of commands.
///
/// \param Graph the graph of commands to execute
/// \return an event representing graph execution operation.
event ext_oneapi_graph(ext::oneapi::experimental::command_graph<
ext::oneapi::experimental::graph_state::executable>
Graph) {
const detail::code_location CodeLoc = {};
return submit([&](handler &CGH) { CGH.ext_oneapi_graph(Graph); }, CodeLoc);
}

/// Shortcut for executing a graph of commands.
///
/// \param Graph the graph of commands to execute
/// \param DepEvent is an event that specifies the graph execution
/// dependencies.
/// \return an event representing graph execution operation.
event ext_oneapi_graph(ext::oneapi::experimental::command_graph<
ext::oneapi::experimental::graph_state::executable>
Graph,
event DepEvent _CODELOCPARAM(&CodeLoc)) {
return submit(
[&](handler &CGH) {
CGH.depends_on(DepEvent);
CGH.ext_oneapi_graph(Graph);
} _CODELOCFW(CodeLoc));
}

/// Shortcut for executing a graph of commands.
///
/// \param Graph the graph of commands to execute
/// \param DepEvents is a vector of events that specifies the graph
/// execution dependencies.
/// \return an event representing graph execution operation.
event ext_oneapi_graph(ext::oneapi::experimental::command_graph<
ext::oneapi::experimental::graph_state::executable>
Graph,
const std::vector<event> &DepEvents) {
const detail::code_location CodeLoc = {};
return submit(
[&](handler &CGH) {
CGH.depends_on(DepEvents);
CGH.ext_oneapi_graph(Graph);
},
CodeLoc);
}

/// Returns whether the queue is in order or OoO
///
/// Equivalent to has_property<property::queue::in_order>()
Expand Down
Loading