Skip to content

Commit 30d9ae7

Browse files
committed
[SYCL][Fusion] Scheduler support for kernel fusion
Signed-off-by: Lukas Sommer <[email protected]>
1 parent 9dc14a2 commit 30d9ae7

File tree

11 files changed

+723
-82
lines changed

11 files changed

+723
-82
lines changed

sycl/source/detail/fusion/fusion_wrapper_impl.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
#include <detail/fusion/fusion_wrapper_impl.hpp>
1010

11+
#include <detail/scheduler/scheduler.hpp>
12+
1113
namespace sycl {
1214
__SYCL_INLINE_VER_NAMESPACE(_V1) {
1315
namespace detail {
@@ -20,22 +22,22 @@ std::shared_ptr<detail::queue_impl> fusion_wrapper_impl::get_queue() const {
2022
return MQueue;
2123
}
2224

23-
bool fusion_wrapper_impl::is_in_fusion_mode() const { return false; }
25+
bool fusion_wrapper_impl::is_in_fusion_mode() const {
26+
return MQueue->is_in_fusion_mode();
27+
}
2428

2529
void fusion_wrapper_impl::start_fusion() {
26-
throw sycl::exception(sycl::errc::feature_not_supported,
27-
"Fusion not yet implemented");
30+
detail::Scheduler::getInstance().startFusion(MQueue);
2831
}
2932

3033
void fusion_wrapper_impl::cancel_fusion() {
31-
throw sycl::exception(sycl::errc::feature_not_supported,
32-
"Fusion not yet implemented");
34+
detail::Scheduler::getInstance().cancelFusion(MQueue);
3335
}
3436

3537
event fusion_wrapper_impl::complete_fusion(const property_list &PropList) {
36-
(void)PropList;
37-
throw sycl::exception(sycl::errc::feature_not_supported,
38-
"Fusion not yet implemented");
38+
auto EventImpl =
39+
detail::Scheduler::getInstance().completeFusion(MQueue, PropList);
40+
return detail::createSyclObjFromImpl<event>(EventImpl);
3941
}
4042

4143
} // namespace detail

sycl/source/detail/helpers.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9+
#include <detail/scheduler/commands.hpp>
910
#include <sycl/detail/helpers.hpp>
1011

1112
#include <detail/context_impl.hpp>
@@ -30,9 +31,16 @@ std::vector<RT::PiEvent> getOrWaitEvents(std::vector<sycl::event> DepEvents,
3031
!SyclEventImplPtr->is_host()) {
3132
continue;
3233
}
34+
// The fusion command and its event are associated with a non-host context,
35+
// but still does not produce a PI event,.
36+
bool NoPiEvent =
37+
SyclEventImplPtr->MCommand &&
38+
!static_cast<Command *>(SyclEventImplPtr->MCommand)->producesPiEvent();
3339
if (SyclEventImplPtr->is_host() ||
34-
SyclEventImplPtr->getContextImpl() != Context) {
35-
SyclEventImplPtr->waitInternal();
40+
SyclEventImplPtr->getContextImpl() != Context || NoPiEvent) {
41+
// Call wait, because the command for the event might not have been
42+
// enqueued when kernel fusion is happening.
43+
SyclEventImplPtr->wait(SyclEventImplPtr);
3644
} else {
3745
Events.push_back(SyclEventImplPtr->getHandleRef());
3846
}

sycl/source/detail/queue_impl.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,15 @@ class queue_impl {
498498

499499
bool ext_oneapi_empty() const;
500500

501+
/// Check whether the queue is in fusion mode.
502+
///
503+
/// \return true if the queue is in fusion mode, false otherwise.
504+
bool is_in_fusion_mode() {
505+
return detail::Scheduler::getInstance().isInFusionMode(
506+
std::hash<typename std::shared_ptr<queue_impl>::element_type *>()(
507+
this));
508+
}
509+
501510
protected:
502511
// template is needed for proper unit testing
503512
template <typename HandlerType = handler>

sycl/source/detail/scheduler/commands.cpp

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,8 @@ static std::string commandToNodeType(Command::CommandType Type) {
168168
return "host_acc_create_buffer_lock_node";
169169
case Command::CommandType::EMPTY_TASK:
170170
return "host_acc_destroy_buffer_release_node";
171+
case Command::CommandType::FUSION:
172+
return "kernel_fusion_placeholder_node";
171173
default:
172174
return "unknown_node";
173175
}
@@ -196,6 +198,8 @@ static std::string commandToName(Command::CommandType Type) {
196198
return "Host Accessor Creation/Buffer Lock";
197199
case Command::CommandType::EMPTY_TASK:
198200
return "Host Accessor Destruction/Buffer Lock Release";
201+
case Command::CommandType::FUSION:
202+
return "Kernel Fusion Placeholder";
199203
default:
200204
return "Unknown Action";
201205
}
@@ -2586,6 +2590,125 @@ bool ExecCGCommand::readyForCleanup() const {
25862590
return MLeafCounter == 0 && MEvent->isCompleted();
25872591
return Command::readyForCleanup();
25882592
}
2593+
2594+
KernelFusionCommand::KernelFusionCommand(QueueImplPtr Queue)
2595+
: Command(Command::CommandType::FUSION, Queue),
2596+
MStatus(FusionStatus::ACTIVE) {
2597+
emitInstrumentationDataProxy();
2598+
}
2599+
2600+
std::vector<Command *> &KernelFusionCommand::auxiliaryCommands() {
2601+
return MAuxiliaryCommands;
2602+
}
2603+
2604+
void KernelFusionCommand::addToFusionList(ExecCGCommand *Kernel) {
2605+
MFusionList.push_back(Kernel);
2606+
}
2607+
2608+
std::vector<ExecCGCommand *> &KernelFusionCommand::getFusionList() {
2609+
return MFusionList;
2610+
}
2611+
2612+
bool KernelFusionCommand::producesPiEvent() const { return false; }
2613+
2614+
pi_int32 KernelFusionCommand::enqueueImp() {
2615+
waitForPreparedHostEvents();
2616+
waitForEvents(MQueue, MPreparedDepsEvents, MEvent->getHandleRef());
2617+
2618+
return PI_SUCCESS;
2619+
}
2620+
2621+
void KernelFusionCommand::setFusionStatus(FusionStatus Status) {
2622+
MStatus = Status;
2623+
}
2624+
2625+
void KernelFusionCommand::emitInstrumentationData() {
2626+
#ifdef XPTI_ENABLE_INSTRUMENTATION
2627+
if (!xptiTraceEnabled()) {
2628+
return;
2629+
}
2630+
// Create a payload with the command name and an event using this payload to
2631+
// emit a node_create
2632+
MCommandNodeType = commandToNodeType(MType);
2633+
MCommandName = commandToName(MType);
2634+
2635+
static unsigned FusionNodeCount = 0;
2636+
std::stringstream PayloadStr;
2637+
PayloadStr << "Fusion command #" << FusionNodeCount++;
2638+
xpti::payload_t Payload = xpti::payload_t(PayloadStr.str().c_str());
2639+
2640+
uint64_t CommandInstanceNo = 0;
2641+
xpti_td *CmdTraceEvent =
2642+
xptiMakeEvent(MCommandName.c_str(), &Payload, xpti::trace_graph_event,
2643+
xpti_at::active, &CommandInstanceNo);
2644+
2645+
MInstanceID = CommandInstanceNo;
2646+
if (CmdTraceEvent) {
2647+
MTraceEvent = static_cast<void *>(CmdTraceEvent);
2648+
// If we are seeing this event again, then the instance ID
2649+
// will be greater
2650+
// than 1; in this case, we must skip sending a
2651+
// notification to create a node as this node has already
2652+
// been created. We return this value so the epilog method
2653+
// can be called selectively.
2654+
// See makeTraceEventProlog.
2655+
MFirstInstance = (CommandInstanceNo == 1);
2656+
}
2657+
2658+
// This function is called in the constructor of the command. At this point
2659+
// the kernel fusion list is still empty, so we don't have a terrible lot of
2660+
// information we could attach to this node here.
2661+
if (MFirstInstance && CmdTraceEvent) {
2662+
xpti::addMetadata(CmdTraceEvent, "sycl_device",
2663+
deviceToID(MQueue->get_device()));
2664+
xpti::addMetadata(CmdTraceEvent, "sycl_device_type",
2665+
deviceToString(MQueue->get_device()));
2666+
xpti::addMetadata(CmdTraceEvent, "sycl_device_name",
2667+
getSyclObjImpl(MQueue->get_device())->getDeviceName());
2668+
}
2669+
2670+
if (MFirstInstance) {
2671+
xptiNotifySubscribers(MStreamID, xpti::trace_node_create,
2672+
detail::GSYCLGraphEvent,
2673+
static_cast<xpti_td *>(MTraceEvent), MInstanceID,
2674+
static_cast<const void *>(MCommandNodeType.c_str()));
2675+
}
2676+
2677+
#endif
2678+
}
2679+
2680+
void KernelFusionCommand::printDot(std::ostream &Stream) const {
2681+
Stream << "\"" << this << "\" [style=filled, fillcolor=\"#AFFF82\", label=\"";
2682+
2683+
Stream << "ID = " << this << "\\n";
2684+
Stream << "KERNEL FUSION on " << deviceToString(MQueue->get_device()) << "\\n"
2685+
<< "FUSION LIST: {";
2686+
bool Initial = true;
2687+
for (auto *Cmd : MFusionList) {
2688+
if (!Initial) {
2689+
Stream << ",\\n";
2690+
}
2691+
Initial = false;
2692+
auto *KernelCG = static_cast<detail::CGExecKernel *>(&Cmd->getCG());
2693+
if (KernelCG->MSyclKernel && KernelCG->MSyclKernel->isCreatedFromSource()) {
2694+
Stream << "created from source";
2695+
} else {
2696+
Stream << demangleKernelName(KernelCG->getKernelName());
2697+
}
2698+
}
2699+
Stream << "}\\n";
2700+
2701+
Stream << "\"];" << std::endl;
2702+
2703+
for (const auto &Dep : MDeps) {
2704+
Stream << " \"" << this << "\" -> \"" << Dep.MDepCommand << "\""
2705+
<< " [ label = \"Access mode: "
2706+
<< accessModeToString(Dep.MDepRequirement->MAccessMode) << "\\n"
2707+
<< "MemObj: " << Dep.MDepRequirement->MSYCLMemObj << " \" ]"
2708+
<< std::endl;
2709+
}
2710+
}
2711+
25892712
} // namespace detail
25902713
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
25912714
} // namespace sycl

sycl/source/detail/scheduler/commands.hpp

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,8 @@ class Command {
104104
UNMAP_MEM_OBJ,
105105
UPDATE_REQUIREMENT,
106106
EMPTY_TASK,
107-
HOST_TASK
107+
HOST_TASK,
108+
FUSION
108109
};
109110

110111
Command(CommandType Type, QueueImplPtr Queue);
@@ -571,6 +572,8 @@ pi_int32 enqueueImpKernel(
571572
std::vector<RT::PiEvent> &RawEvents, RT::PiEvent *OutEvent,
572573
const std::function<void *(Requirement *Req)> &getMemAllocationFunc);
573574

575+
class KernelFusionCommand;
576+
574577
/// The exec CG command enqueues execution of kernel or explicit memory
575578
/// operation.
576579
class ExecCGCommand : public Command {
@@ -586,6 +589,17 @@ class ExecCGCommand : public Command {
586589

587590
detail::CG &getCG() const { return *MCommandGroup; }
588591

592+
// MEmptyCmd is only employed if this command refers to host-task.
593+
// The mechanism of lookup for single EmptyCommand amongst users of
594+
// host-task-representing command is unreliable. This unreliability roots in
595+
// the cleanup process.
596+
EmptyCommand *MEmptyCmd = nullptr;
597+
598+
// MFusionCommand is employed to mark a CG command as part of a kernel fusion
599+
// and allows to refer back to the corresponding KernelFusionCommand if
600+
// necessary.
601+
KernelFusionCommand *MFusionCmd = nullptr;
602+
589603
bool producesPiEvent() const final;
590604

591605
bool supportsPostEnqueueCleanup() const final;
@@ -619,6 +633,43 @@ class UpdateHostRequirementCommand : public Command {
619633
void **MDstPtr = nullptr;
620634
};
621635

636+
/// The KernelFusionCommand is placed in the execution graph together with the
637+
/// individual kernels of the fusion list to control kernel fusion.
638+
class KernelFusionCommand : public Command {
639+
public:
640+
enum class FusionStatus { ACTIVE, CANCELLED, COMPLETE, DELETED };
641+
642+
explicit KernelFusionCommand(QueueImplPtr Queue);
643+
644+
void printDot(std::ostream &Stream) const final;
645+
void emitInstrumentationData() final;
646+
bool producesPiEvent() const final;
647+
648+
std::vector<Command *> &auxiliaryCommands();
649+
650+
void addToFusionList(ExecCGCommand *Kernel);
651+
652+
std::vector<ExecCGCommand *> &getFusionList();
653+
654+
///
655+
/// Set the status of this fusion command to \p Status. This function should
656+
/// only be called under the protection of the scheduler write-lock.
657+
void setFusionStatus(FusionStatus Status);
658+
659+
bool isActive() const { return MStatus == FusionStatus::ACTIVE; }
660+
661+
bool readyForDeletion() const { return MStatus == FusionStatus::DELETED; }
662+
663+
private:
664+
pi_int32 enqueueImp() final;
665+
666+
std::vector<ExecCGCommand *> MFusionList;
667+
668+
std::vector<Command *> MAuxiliaryCommands;
669+
670+
FusionStatus MStatus;
671+
};
672+
622673
} // namespace detail
623674
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
624675
} // namespace sycl

0 commit comments

Comments
 (0)