Skip to content

[SYCL][Fusion] Scheduler support for kernel fusion #7531

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions sycl/source/detail/fusion/fusion_wrapper_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

#include <detail/fusion/fusion_wrapper_impl.hpp>

#include <detail/scheduler/scheduler.hpp>

namespace sycl {
__SYCL_INLINE_VER_NAMESPACE(_V1) {
namespace detail {
Expand All @@ -20,22 +22,22 @@ std::shared_ptr<detail::queue_impl> fusion_wrapper_impl::get_queue() const {
return MQueue;
}

bool fusion_wrapper_impl::is_in_fusion_mode() const { return false; }
bool fusion_wrapper_impl::is_in_fusion_mode() const {
return MQueue->is_in_fusion_mode();
}

void fusion_wrapper_impl::start_fusion() {
throw sycl::exception(sycl::errc::feature_not_supported,
"Fusion not yet implemented");
detail::Scheduler::getInstance().startFusion(MQueue);
}

void fusion_wrapper_impl::cancel_fusion() {
throw sycl::exception(sycl::errc::feature_not_supported,
"Fusion not yet implemented");
detail::Scheduler::getInstance().cancelFusion(MQueue);
}

event fusion_wrapper_impl::complete_fusion(const property_list &PropList) {
(void)PropList;
throw sycl::exception(sycl::errc::feature_not_supported,
"Fusion not yet implemented");
auto EventImpl =
detail::Scheduler::getInstance().completeFusion(MQueue, PropList);
return detail::createSyclObjFromImpl<event>(EventImpl);
}

} // namespace detail
Expand Down
12 changes: 10 additions & 2 deletions sycl/source/detail/helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//

#include <detail/scheduler/commands.hpp>
#include <sycl/detail/helpers.hpp>

#include <detail/context_impl.hpp>
Expand All @@ -30,9 +31,16 @@ std::vector<RT::PiEvent> getOrWaitEvents(std::vector<sycl::event> DepEvents,
!SyclEventImplPtr->is_host()) {
continue;
}
// The fusion command and its event are associated with a non-host context,
// but still does not produce a PI event.
bool NoPiEvent =
SyclEventImplPtr->MCommand &&
!static_cast<Command *>(SyclEventImplPtr->MCommand)->producesPiEvent();
if (SyclEventImplPtr->is_host() ||
SyclEventImplPtr->getContextImpl() != Context) {
SyclEventImplPtr->waitInternal();
SyclEventImplPtr->getContextImpl() != Context || NoPiEvent) {
// Call wait, because the command for the event might not have been
// enqueued when kernel fusion is happening.
SyclEventImplPtr->wait(SyclEventImplPtr);
} else {
Events.push_back(SyclEventImplPtr->getHandleRef());
}
Expand Down
9 changes: 9 additions & 0 deletions sycl/source/detail/queue_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,15 @@ class queue_impl {

bool ext_oneapi_empty() const;

/// Check whether the queue is in fusion mode.
///
/// \return true if the queue is in fusion mode, false otherwise.
bool is_in_fusion_mode() {
return detail::Scheduler::getInstance().isInFusionMode(
std::hash<typename std::shared_ptr<queue_impl>::element_type *>()(
this));
}

protected:
// template is needed for proper unit testing
template <typename HandlerType = handler>
Expand Down
123 changes: 123 additions & 0 deletions sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,8 @@ static std::string commandToNodeType(Command::CommandType Type) {
return "host_acc_create_buffer_lock_node";
case Command::CommandType::EMPTY_TASK:
return "host_acc_destroy_buffer_release_node";
case Command::CommandType::FUSION:
return "kernel_fusion_placeholder_node";
default:
return "unknown_node";
}
Expand Down Expand Up @@ -196,6 +198,8 @@ static std::string commandToName(Command::CommandType Type) {
return "Host Accessor Creation/Buffer Lock";
case Command::CommandType::EMPTY_TASK:
return "Host Accessor Destruction/Buffer Lock Release";
case Command::CommandType::FUSION:
return "Kernel Fusion Placeholder";
default:
return "Unknown Action";
}
Expand Down Expand Up @@ -2586,6 +2590,125 @@ bool ExecCGCommand::readyForCleanup() const {
return MLeafCounter == 0 && MEvent->isCompleted();
return Command::readyForCleanup();
}

KernelFusionCommand::KernelFusionCommand(QueueImplPtr Queue)
: Command(Command::CommandType::FUSION, Queue),
MStatus(FusionStatus::ACTIVE) {
emitInstrumentationDataProxy();
}

std::vector<Command *> &KernelFusionCommand::auxiliaryCommands() {
return MAuxiliaryCommands;
}

void KernelFusionCommand::addToFusionList(ExecCGCommand *Kernel) {
MFusionList.push_back(Kernel);
}

std::vector<ExecCGCommand *> &KernelFusionCommand::getFusionList() {
return MFusionList;
}

bool KernelFusionCommand::producesPiEvent() const { return false; }

pi_int32 KernelFusionCommand::enqueueImp() {
waitForPreparedHostEvents();
waitForEvents(MQueue, MPreparedDepsEvents, MEvent->getHandleRef());

return PI_SUCCESS;
}

void KernelFusionCommand::setFusionStatus(FusionStatus Status) {
MStatus = Status;
}

void KernelFusionCommand::emitInstrumentationData() {
#ifdef XPTI_ENABLE_INSTRUMENTATION
if (!xptiTraceEnabled()) {
return;
}
// Create a payload with the command name and an event using this payload to
// emit a node_create
MCommandNodeType = commandToNodeType(MType);
MCommandName = commandToName(MType);

static unsigned FusionNodeCount = 0;
std::stringstream PayloadStr;
PayloadStr << "Fusion command #" << FusionNodeCount++;
xpti::payload_t Payload = xpti::payload_t(PayloadStr.str().c_str());

uint64_t CommandInstanceNo = 0;
xpti_td *CmdTraceEvent =
xptiMakeEvent(MCommandName.c_str(), &Payload, xpti::trace_graph_event,
xpti_at::active, &CommandInstanceNo);

MInstanceID = CommandInstanceNo;
if (CmdTraceEvent) {
MTraceEvent = static_cast<void *>(CmdTraceEvent);
// If we are seeing this event again, then the instance ID
// will be greater
// than 1; in this case, we must skip sending a
// notification to create a node as this node has already
// been created. We return this value so the epilog method
// can be called selectively.
// See makeTraceEventProlog.
MFirstInstance = (CommandInstanceNo == 1);
}

// This function is called in the constructor of the command. At this point
// the kernel fusion list is still empty, so we don't have a terrible lot of
// information we could attach to this node here.
if (MFirstInstance && CmdTraceEvent) {
xpti::addMetadata(CmdTraceEvent, "sycl_device",
deviceToID(MQueue->get_device()));
xpti::addMetadata(CmdTraceEvent, "sycl_device_type",
deviceToString(MQueue->get_device()));
xpti::addMetadata(CmdTraceEvent, "sycl_device_name",
getSyclObjImpl(MQueue->get_device())->getDeviceName());
}

if (MFirstInstance) {
xptiNotifySubscribers(MStreamID, xpti::trace_node_create,
detail::GSYCLGraphEvent,
static_cast<xpti_td *>(MTraceEvent), MInstanceID,
static_cast<const void *>(MCommandNodeType.c_str()));
}

#endif
}

void KernelFusionCommand::printDot(std::ostream &Stream) const {
Stream << "\"" << this << "\" [style=filled, fillcolor=\"#AFFF82\", label=\"";

Stream << "ID = " << this << "\\n";
Stream << "KERNEL FUSION on " << deviceToString(MQueue->get_device()) << "\\n"
<< "FUSION LIST: {";
bool Initial = true;
for (auto *Cmd : MFusionList) {
if (!Initial) {
Stream << ",\\n";
}
Initial = false;
auto *KernelCG = static_cast<detail::CGExecKernel *>(&Cmd->getCG());
if (KernelCG->MSyclKernel && KernelCG->MSyclKernel->isCreatedFromSource()) {
Stream << "created from source";
} else {
Stream << demangleKernelName(KernelCG->getKernelName());
}
}
Stream << "}\\n";

Stream << "\"];" << std::endl;

for (const auto &Dep : MDeps) {
Stream << " \"" << this << "\" -> \"" << Dep.MDepCommand << "\""
<< " [ label = \"Access mode: "
<< accessModeToString(Dep.MDepRequirement->MAccessMode) << "\\n"
<< "MemObj: " << Dep.MDepRequirement->MSYCLMemObj << " \" ]"
<< std::endl;
}
}

} // namespace detail
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl
53 changes: 52 additions & 1 deletion sycl/source/detail/scheduler/commands.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ class Command {
UNMAP_MEM_OBJ,
UPDATE_REQUIREMENT,
EMPTY_TASK,
HOST_TASK
HOST_TASK,
FUSION
};

Command(CommandType Type, QueueImplPtr Queue);
Expand Down Expand Up @@ -571,6 +572,8 @@ pi_int32 enqueueImpKernel(
std::vector<RT::PiEvent> &RawEvents, RT::PiEvent *OutEvent,
const std::function<void *(Requirement *Req)> &getMemAllocationFunc);

class KernelFusionCommand;

/// The exec CG command enqueues execution of kernel or explicit memory
/// operation.
class ExecCGCommand : public Command {
Expand All @@ -586,6 +589,17 @@ class ExecCGCommand : public Command {

detail::CG &getCG() const { return *MCommandGroup; }

// MEmptyCmd is only employed if this command refers to host-task.
// The mechanism of lookup for single EmptyCommand amongst users of
// host-task-representing command is unreliable. This unreliability roots in
// the cleanup process.
EmptyCommand *MEmptyCmd = nullptr;

// MFusionCommand is employed to mark a CG command as part of a kernel fusion
// and allows to refer back to the corresponding KernelFusionCommand if
// necessary.
KernelFusionCommand *MFusionCmd = nullptr;

bool producesPiEvent() const final;

bool supportsPostEnqueueCleanup() const final;
Expand Down Expand Up @@ -619,6 +633,43 @@ class UpdateHostRequirementCommand : public Command {
void **MDstPtr = nullptr;
};

/// The KernelFusionCommand is placed in the execution graph together with the
/// individual kernels of the fusion list to control kernel fusion.
class KernelFusionCommand : public Command {
public:
enum class FusionStatus { ACTIVE, CANCELLED, COMPLETE, DELETED };

explicit KernelFusionCommand(QueueImplPtr Queue);

void printDot(std::ostream &Stream) const final;
void emitInstrumentationData() final;
bool producesPiEvent() const final;

std::vector<Command *> &auxiliaryCommands();

void addToFusionList(ExecCGCommand *Kernel);

std::vector<ExecCGCommand *> &getFusionList();

///
/// Set the status of this fusion command to \p Status. This function should
/// only be called under the protection of the scheduler write-lock.
void setFusionStatus(FusionStatus Status);

bool isActive() const { return MStatus == FusionStatus::ACTIVE; }

bool readyForDeletion() const { return MStatus == FusionStatus::DELETED; }

private:
pi_int32 enqueueImp() final;

std::vector<ExecCGCommand *> MFusionList;

std::vector<Command *> MAuxiliaryCommands;

FusionStatus MStatus;
};

} // namespace detail
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl
Loading