Skip to content

[SYCL][Graph] Backend integration and feature additions for SYCL Graphs (3/4) #10033

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 37 commits into from
Jul 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
2ae2368
[SYCL][Graphs] Squashed commit of remaining runtime changes
Bensuo Jun 22, 2023
4f9ea64
[SYCL][Graph] Remove unused CommandType value
Bensuo Jun 22, 2023
67475ca
[SYCL][Graph] Revert change to isInteropHostTask
Bensuo Jun 22, 2023
b4c24ba
[SYCL][Graph] Fix formatting in graph_impl.hpp
Bensuo Jun 22, 2023
be7c202
Merge pull request #237 from reble/julianmi/fix-double-root-nodes
julianmi Jun 26, 2023
3d01d48
Expand unittests
julianmi Jun 27, 2023
8c88084
[SYCL][Graph] Support for read and write for one-dimensional and 2d b…
mfrancepillois Jun 27, 2023
8578f21
[SYCL] Fix clang build (#242)
EwanC Jun 27, 2023
4a09be0
Replace `RT` namespace with `sycl::detail::pi`
EwanC Jun 29, 2023
d170f32
Fix clang-10 compilation issue
EwanC Jun 29, 2023
727b234
Restore scheduler regression test
EwanC Jun 29, 2023
e7c8edd
Merge branch 'sycl' into sycl-graph-patch-3
EwanC Jun 29, 2023
0b9c175
Merge branch 'sycl' into sycl-graph-patch-3
EwanC Jul 4, 2023
4d780af
Remove std::move on temporary
EwanC Jul 4, 2023
6e4d3f9
Fix issues from merge
EwanC Jul 4, 2023
8951bca
Fix clang-format issue
EwanC Jul 4, 2023
64b8eb9
Update windows symbols test
EwanC Jul 5, 2023
65e8143
Fixup PI inaccuracies
EwanC Jul 5, 2023
0dfdf4f
Missing offsets in Level Zero command-buffr buffer copy
EwanC Jul 5, 2023
63cb32c
Remove characters incorrectly copy/pasted
EwanC Jul 5, 2023
f98fd78
Merge branch 'sycl' into sycl-graph-patch-3
EwanC Jul 5, 2023
cb05754
Report "ur_exp_command_buffer" as Level Zero device extension
EwanC Jul 5, 2023
f6a9b16
[SYCL][Graph] Fix in_order queue with empty nodes
mfrancepillois Jul 5, 2023
21ae184
Fixup formatting and doxy comments
EwanC Jul 5, 2023
de12fa0
Fix clang-format
EwanC Jul 5, 2023
911a67d
Merge branch 'sycl' into sycl-graph-patch-3
EwanC Jul 6, 2023
732cb04
clang-format pi_level_zero.cpp
EwanC Jul 6, 2023
4bcf0ff
[SYCL][Graph] Fix In-order queue unitests single context bug
mfrancepillois Jul 6, 2023
3ee7743
[SYCL][Graph] Implement exeption throwing for inconsistent context or…
mfrancepillois Jul 6, 2023
e614936
[SYCL][Graph] Add exceptions on invalid event and queue usage
Bensuo Jul 6, 2023
547fdac
Update Windows Symbols
EwanC Jul 7, 2023
eadc0c1
Update unittest `CommandGraphTest.BeginEndRecording`
EwanC Jul 7, 2023
b054c63
Merge branch 'sycl' into sycl-graph-patch-3
EwanC Jul 11, 2023
74e7203
[SYCL] Change `graph_support_level` namespace (#256)
EwanC Jul 12, 2023
58efbfd
[SYCL][Graph] Address reviewer feedback
Bensuo Jul 12, 2023
727972b
Early return in command-buffer memcpy commands
EwanC Jul 17, 2023
6098a9c
Merge branch 'sycl' into sycl-graph-patch-3
EwanC Jul 17, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions sycl/include/sycl/detail/cg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,17 @@ class CGCopyFromDeviceGlobal : public CG {
size_t getOffset() { return MOffset; }
};

/// "Execute command-buffer" command group class.
class CGExecCommandBuffer : public CG {
public:
sycl::detail::pi::PiExtCommandBuffer MCommandBuffer;

CGExecCommandBuffer(sycl::detail::pi::PiExtCommandBuffer CommandBuffer,
CG::StorageInitHelper CGData)
: CG(CGTYPE::ExecCommandBuffer, std::move(CGData)),
MCommandBuffer(CommandBuffer) {}
};

} // namespace detail
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl
2 changes: 1 addition & 1 deletion sycl/include/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -2284,7 +2284,7 @@ __SYCL_EXPORT pi_result piextCommandBufferMemBufferCopyRect(
/// \param sync_point The sync_point associated with this memory operation.
__SYCL_EXPORT pi_result piextCommandBufferMemBufferRead(
pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset,
size_t size, void *dst, pi_uint32 num_events_in_wait_list,
size_t size, void *dst, pi_uint32 num_sync_points_in_wait_list,
const pi_ext_sync_point *sync_point_wait_list,
pi_ext_sync_point *sync_point);

Expand Down
7 changes: 4 additions & 3 deletions sycl/include/sycl/ext/oneapi/experimental/graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,8 +221,6 @@ class __SYCL_EXPORT executable_command_graph {

int MTag;
std::shared_ptr<detail::exec_graph_impl> impl;

friend class modifiable_command_graph;
};
} // namespace detail

Expand All @@ -248,7 +246,10 @@ class command_graph : public detail::modifiable_command_graph {
template <>
class command_graph<graph_state::executable>
: public detail::executable_command_graph {
private:

protected:
friend command_graph<graph_state::executable>
detail::modifiable_command_graph::finalize(const sycl::property_list &) const;
using detail::executable_command_graph::executable_command_graph;
};

Expand Down
6 changes: 6 additions & 0 deletions sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1542,6 +1542,12 @@ class __SYCL_EXPORT handler {
setType(detail::CG::CodeplayHostTask);
}

/// @brief Get the command graph if any associated with this handler. It can
/// come from either the associated queue or from being set explicitly through
/// the appropriate constructor.
std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
getCommandGraph() const;

public:
handler(const handler &) = delete;
handler(handler &&) = delete;
Expand Down
4 changes: 4 additions & 0 deletions sycl/include/sycl/info/ext_oneapi_device_traits.def
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ __SYCL_PARAM_TRAITS_TEMPLATE_SPEC(ext::oneapi::experimental,device, max_work_gro
__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, device, architecture,
ext::oneapi::experimental::architecture,
PI_EXT_ONEAPI_DEVICE_INFO_IP_VERSION)
__SYCL_PARAM_TRAITS_SPEC(
ext::oneapi::experimental, device, graph_support,
ext::oneapi::experimental::info::graph_support_level,
0 /* No PI device code needed */)
#ifdef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF
#undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC
#undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF
Expand Down
9 changes: 7 additions & 2 deletions sycl/include/sycl/info/info_desc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,14 @@ template <typename T, T param> struct compatibility_param_traits {};
} /*namespace info */ \
} /*namespace Namespace */

namespace ext::oneapi::experimental::info::device {
namespace ext::oneapi::experimental::info {

enum class graph_support_level { unsupported = 0, native, emulated };

namespace device {
template <int Dimensions> struct max_work_groups;
} // namespace ext::oneapi::experimental::info::device
} // namespace device
} // namespace ext::oneapi::experimental::info
#include <sycl/info/ext_codeplay_device_traits.def>
#include <sycl/info/ext_intel_device_traits.def>
#include <sycl/info/ext_oneapi_device_traits.def>
Expand Down
13 changes: 13 additions & 0 deletions sycl/include/sycl/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,16 @@ static event submitAssertCapture(queue &, event &, queue *,
#endif
} // namespace detail

namespace ext {
namespace oneapi {
namespace experimental {
// State of a queue with regards to graph recording,
// returned by info::queue::state
enum class queue_state { executing, recording };
} // namespace experimental
} // namespace oneapi
} // namespace ext

/// Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
///
/// A SYCL queue can be used to submit command groups to be executed by the SYCL
Expand Down Expand Up @@ -283,6 +293,9 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
/// \return SYCL device this queue was constructed with.
device get_device() const;

/// \return State the queue is currently in.
ext::oneapi::experimental::queue_state ext_oneapi_get_state() const;

/// \return true if this queue is a SYCL host queue.
__SYCL2020_DEPRECATED(
"is_host() is deprecated as the host device is no longer supported.")
Expand Down
32 changes: 16 additions & 16 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1118,46 +1118,46 @@ pi_result piextCommandBufferMemBufferCopyRect(

pi_result piextCommandBufferMemBufferRead(
pi_ext_command_buffer CommandBuffer, pi_mem Buffer, size_t Offset,
size_t Size, void *Dst, pi_uint32 NumEventsInWaitList,
size_t Size, void *Dst, pi_uint32 NumSyncPointsInWaitList,
const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) {
return pi2ur::piextCommandBufferMemBufferRead(CommandBuffer, Buffer, Offset,
Size, Dst, NumEventsInWaitList,
SyncPointWaitList, SyncPoint);
return pi2ur::piextCommandBufferMemBufferRead(
CommandBuffer, Buffer, Offset, Size, Dst, NumSyncPointsInWaitList,
SyncPointWaitList, SyncPoint);
}

pi_result piextCommandBufferMemBufferReadRect(
pi_ext_command_buffer CommandBuffer, pi_mem Buffer,
pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset,
pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch,
size_t HostRowPitch, size_t HostSlicePitch, void *Ptr,
pi_uint32 NumEventsInWaitList, const pi_ext_sync_point *SyncPointWaitList,
pi_ext_sync_point *SyncPoint) {
pi_uint32 NumSyncPointsInWaitList,
const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) {
return pi2ur::piextCommandBufferMemBufferReadRect(
CommandBuffer, Buffer, BufferOffset, HostOffset, Region, BufferRowPitch,
BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, NumEventsInWaitList,
SyncPointWaitList, SyncPoint);
BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr,
NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint);
}

pi_result piextCommandBufferMemBufferWrite(
pi_ext_command_buffer CommandBuffer, pi_mem Buffer, size_t Offset,
size_t Size, const void *Ptr, pi_uint32 NumEventsInWaitList,
size_t Size, const void *Ptr, pi_uint32 NumSyncPointsInWaitList,
const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) {
return pi2ur::piextCommandBufferMemBufferWrite(CommandBuffer, Buffer, Offset,
Size, Ptr, NumEventsInWaitList,
SyncPointWaitList, SyncPoint);
return pi2ur::piextCommandBufferMemBufferWrite(
CommandBuffer, Buffer, Offset, Size, Ptr, NumSyncPointsInWaitList,
SyncPointWaitList, SyncPoint);
}

pi_result piextCommandBufferMemBufferWriteRect(
pi_ext_command_buffer CommandBuffer, pi_mem Buffer,
pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset,
pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch,
size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr,
pi_uint32 NumEventsInWaitList, const pi_ext_sync_point *SyncPointWaitList,
pi_ext_sync_point *SyncPoint) {
pi_uint32 NumSyncPointsInWaitList,
const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) {
return pi2ur::piextCommandBufferMemBufferWriteRect(
CommandBuffer, Buffer, BufferOffset, HostOffset, Region, BufferRowPitch,
BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, NumEventsInWaitList,
SyncPointWaitList, SyncPoint);
BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr,
NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint);
}

pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer,
Expand Down
6 changes: 6 additions & 0 deletions sycl/plugins/opencl/pi_opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2726,6 +2726,12 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
_PI_CL(piextCommandBufferMemBufferCopy, piextCommandBufferMemBufferCopy)
_PI_CL(piextCommandBufferMemBufferCopyRect,
piextCommandBufferMemBufferCopyRect)
_PI_CL(piextCommandBufferMemBufferRead, piextCommandBufferMemBufferRead)
_PI_CL(piextCommandBufferMemBufferReadRect,
piextCommandBufferMemBufferReadRect)
_PI_CL(piextCommandBufferMemBufferWrite, piextCommandBufferMemBufferWrite)
_PI_CL(piextCommandBufferMemBufferWriteRect,
piextCommandBufferMemBufferWriteRect)
_PI_CL(piextEnqueueCommandBuffer, piextEnqueueCommandBuffer)

_PI_CL(piextKernelSetArgMemObj, piextKernelSetArgMemObj)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -535,9 +535,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp(
uint32_t NumSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *SyncPointWaitList,
ur_exp_command_buffer_sync_point_t *SyncPoint) {
(void)SrcOffset;
(void)DstOffset;

auto SrcBuffer = ur_cast<ur_mem_handle_t>(SrcMem);
auto DstBuffer = ur_cast<ur_mem_handle_t>(DstMem);

Expand All @@ -553,8 +550,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp(
CommandBuffer->Device));

return enqueueCommandBufferMemCopyHelper(
UR_COMMAND_MEM_BUFFER_COPY, CommandBuffer, ZeHandleDst, ZeHandleSrc, Size,
NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint);
UR_COMMAND_MEM_BUFFER_COPY, CommandBuffer, ZeHandleDst + DstOffset,
ZeHandleSrc + SrcOffset, Size, NumSyncPointsInWaitList, SyncPointWaitList,
SyncPoint);
}

UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(
(Device->ZeDeviceProperties->deviceId & 0xff0) == 0xbd0)
SupportedExtensions += ("cl_intel_bfloat16_conversions ");

// Return supported for the UR command-buffer experimental feature
SupportedExtensions += ("ur_exp_command_buffer ");

return ReturnValue(SupportedExtensions.c_str());
}
case UR_DEVICE_INFO_NAME:
Expand Down
36 changes: 36 additions & 0 deletions sycl/source/detail/device_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -884,6 +884,35 @@ struct get_device_info_impl<
}
};

// Specialization for graph extension support
template <>
struct get_device_info_impl<
ext::oneapi::experimental::info::graph_support_level,
ext::oneapi::experimental::info::device::graph_support> {
static ext::oneapi::experimental::info::graph_support_level
get(const DeviceImplPtr &Dev) {
size_t ResultSize = 0;
Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
Dev->getHandleRef(), PI_DEVICE_INFO_EXTENSIONS, 0, nullptr,
&ResultSize);
if (ResultSize == 0)
return ext::oneapi::experimental::info::graph_support_level::unsupported;

std::unique_ptr<char[]> Result(new char[ResultSize]);
Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
Dev->getHandleRef(), PI_DEVICE_INFO_EXTENSIONS, ResultSize,
Result.get(), nullptr);

std::string_view ExtensionsString(Result.get());
bool CmdBufferSupport =
ExtensionsString.find("ur_exp_command_buffer") != std::string::npos;
return CmdBufferSupport
? ext::oneapi::experimental::info::graph_support_level::native
: ext::oneapi::experimental::info::graph_support_level::
unsupported;
}
};

template <typename Param>
typename Param::return_type get_device_info(const DeviceImplPtr &Dev) {
static_assert(is_device_info_desc<Param>::value,
Expand Down Expand Up @@ -1778,6 +1807,13 @@ inline uint32_t get_device_info_host<
PI_ERROR_INVALID_DEVICE);
}

template <>
inline ext::oneapi::experimental::info::graph_support_level
get_device_info_host<ext::oneapi::experimental::info::device::graph_support>() {
// No support for graphs on the host device.
return ext::oneapi::experimental::info::graph_support_level::unsupported;
}

} // namespace detail
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl
6 changes: 6 additions & 0 deletions sycl/source/detail/event_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,12 @@ void event_impl::wait(std::shared_ptr<sycl::detail::event_impl> Self) {
throw sycl::exception(make_error_code(errc::invalid),
"wait method cannot be used for a discarded event.");

if (MGraph.lock()) {
throw sycl::exception(make_error_code(errc::invalid),
"wait method cannot be used for an event associated "
"with a command graph.");
}

#ifdef XPTI_ENABLE_INSTRUMENTATION
void *TelemetryEvent = nullptr;
uint64_t IId;
Expand Down
31 changes: 31 additions & 0 deletions sycl/source/detail/event_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@

namespace sycl {
__SYCL_INLINE_VER_NAMESPACE(_V1) {
namespace ext::oneapi::experimental::detail {
class graph_impl;
}
class context;
namespace detail {
class plugin;
Expand Down Expand Up @@ -256,6 +259,25 @@ class event_impl {
return MContext;
}

// Sets a sync point which is used when this event represents an enqueue to a
// Command Bufferr.
void setSyncPoint(sycl::detail::pi::PiExtSyncPoint SyncPoint) {
MSyncPoint = SyncPoint;
}

// Get the sync point associated with this event.
sycl::detail::pi::PiExtSyncPoint getSyncPoint() const { return MSyncPoint; }

void setCommandGraph(
std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> Graph) {
MGraph = Graph;
}

std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
getCommandGraph() const {
return MGraph.lock();
}

protected:
// When instrumentation is enabled emits trace event for event wait begin and
// returns the telemetry event generated for the wait
Expand Down Expand Up @@ -302,6 +324,15 @@ class event_impl {
std::mutex MMutex;
std::condition_variable cv;

/// Store the command graph associated with this event, if any.
/// This event is also be stored in the graph so a weak_ptr is used.
std::weak_ptr<ext::oneapi::experimental::detail::graph_impl> MGraph;

// If this event represents a submission to a
// sycl::detail::pi::PiExtCommandBuffer the sync point for that submission is
// stored here.
sycl::detail::pi::PiExtSyncPoint MSyncPoint;

friend std::vector<sycl::detail::pi::PiEvent>
getOrWaitEvents(std::vector<sycl::event> DepEvents,
std::shared_ptr<sycl::detail::context_impl> Context);
Expand Down
Loading