Skip to content

[SYCL] Add implementation of host-interop-task and test. #1748

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 38 commits into from
Jun 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
f37a294
[SYCL] Introduce interop handle for host task
May 25, 2020
cea1bf9
[SYCL] Add interface to call host-interop-task
May 25, 2020
e04ea75
[SYCL] Add test
May 25, 2020
024fe9e
[SYCL] Fix style issue
May 25, 2020
60e0003
[SYCL] Add implementation of host-task with interop_handle argument
May 25, 2020
f8886cc
[SYCL] Enable the test
May 25, 2020
d46dde2
Merge branch 'sycl' into private/s-kanaev/ht-interop-task-iface
Jun 10, 2020
e2606df
[SYCL] Fix style issue
Jun 10, 2020
70be117
Merge branch 'sycl' into private/s-kanaev/ht-interop-task-iface
Jun 17, 2020
55ca5e6
Merge branch 'sycl' into private/s-kanaev/ht-interop-task-impl
Jun 17, 2020
9ecd809
[SYCL] Fix merge glitch
Jun 17, 2020
7330e46
Merge branch 'private/s-kanaev/ht-interop-task-impl' into private/s-k…
Jun 17, 2020
f59ba0e
[SYCL] Fix merge glitch
Jun 17, 2020
b4ae526
Merge branch 'sycl' into private/s-kanaev/ht-interop-task-iface+impl
Jun 22, 2020
3c0a531
[SYCL] Remove redundant test
Jun 22, 2020
723de56
[SYCL] Fixes in implementation of interop host task
Jun 22, 2020
3dac7cf
[SYCL] Add tests
Jun 22, 2020
9ba1468
[SYCL] Fix ABI tests
Jun 22, 2020
3ae370c
[SYCL] Uplift dev version
Jun 22, 2020
baa39f3
[SYCL] Fix style issues
Jun 22, 2020
762f31d
[SYCL] Employ detail::* type traits
Jun 22, 2020
89b373c
[SYCL] Fix style issue
Jun 22, 2020
6a51f9d
[SYCL] Fix test
Jun 23, 2020
7486f0d
[SYCL] Disable test for Level0 plugin
Jun 23, 2020
b89ba0d
[SYCL][NFC] Remove redundant includes
Jun 23, 2020
9f5c4f5
Merge branch 'sycl' into private/s-kanaev/ht-interop-task-iface
Jun 23, 2020
48eae78
[SYCL] Disable the test for cuda
Jun 23, 2020
f90bd72
[SYCL] Disable the test for cuda
Jun 23, 2020
aee07ca
Merge branch 'sycl' into private/s-kanaev/ht-interop-task-iface
Jun 23, 2020
81df3bb
[SYCL][NFC] Apply comments.
Jun 24, 2020
c91cdab
Merge branch 'sycl' into private/s-kanaev/ht-interop-task-iface
Jun 25, 2020
404208b
[SYCL] Add test
Jun 25, 2020
285cb5a
[SYCL] Add test
Jun 25, 2020
56b6fa7
[SYCL] Create an allocation on device for host-interop-task
Jun 25, 2020
ec9f04d
[SYCL] Add test
Jun 25, 2020
253cbcb
[SYCL] Fix style issues
Jun 25, 2020
7813bb6
Merge branch 'sycl' into private/s-kanaev/ht-interop-task-iface
Jun 26, 2020
e5c6cf5
[SYCL] Stylistic changes
Jun 29, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions sycl/include/CL/sycl/detail/cg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -305,9 +305,16 @@ class CGInteropTask : public CG {
class CGHostTask : public CG {
public:
std::unique_ptr<HostTask> MHostTask;
// queue for host-interop task
shared_ptr_class<detail::queue_impl> MQueue;
// context for host-interop task
shared_ptr_class<detail::context_impl> MContext;
vector_class<ArgDesc> MArgs;

CGHostTask(std::unique_ptr<HostTask> HostTask, vector_class<ArgDesc> Args,
CGHostTask(std::unique_ptr<HostTask> HostTask,
std::shared_ptr<detail::queue_impl> Queue,
std::shared_ptr<detail::context_impl> Context,
vector_class<ArgDesc> Args,
std::vector<std::vector<char>> ArgsStorage,
std::vector<detail::AccessorImplPtr> AccStorage,
std::vector<std::shared_ptr<const void>> SharedPtrStorage,
Expand All @@ -317,7 +324,8 @@ class CGHostTask : public CG {
: CG(Type, std::move(ArgsStorage), std::move(AccStorage),
std::move(SharedPtrStorage), std::move(Requirements),
std::move(Events), std::move(loc)),
MHostTask(std::move(HostTask)), MArgs(std::move(Args)) {}
MHostTask(std::move(HostTask)), MQueue(Queue), MContext(Context),
MArgs(std::move(Args)) {}
};

class CGBarrier : public CG {
Expand Down
6 changes: 6 additions & 0 deletions sycl/include/CL/sycl/detail/cg_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <CL/sycl/detail/kernel_desc.hpp>
#include <CL/sycl/group.hpp>
#include <CL/sycl/id.hpp>
#include <CL/sycl/interop_handle.hpp>
#include <CL/sycl/interop_handler.hpp>
#include <CL/sycl/kernel.hpp>
#include <CL/sycl/nd_item.hpp>
Expand Down Expand Up @@ -143,12 +144,17 @@ class InteropTask {

class HostTask {
std::function<void()> MHostTask;
std::function<void(interop_handle)> MInteropTask;

public:
HostTask() : MHostTask([]() {}) {}
HostTask(std::function<void()> &&Func) : MHostTask(Func) {}
HostTask(std::function<void(interop_handle)> &&Func) : MInteropTask(Func) {}

bool isInteropTask() const { return !!MInteropTask; }

void call() { MHostTask(); }
void call(interop_handle handle) { MInteropTask(handle); }
};

// Class which stores specific lambda object.
Expand Down
1 change: 1 addition & 0 deletions sycl/include/CL/sycl/detail/sycl_mem_obj_i.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ class SYCLMemObjI {
// which is unavailable.
shared_ptr_class<MemObjRecord> MRecord;
friend class Scheduler;
friend class ExecCGCommand;
};

} // namespace detail
Expand Down
19 changes: 17 additions & 2 deletions sycl/include/CL/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <CL/sycl/detail/os_util.hpp>
#include <CL/sycl/event.hpp>
#include <CL/sycl/id.hpp>
#include <CL/sycl/interop_handle.hpp>
#include <CL/sycl/item.hpp>
#include <CL/sycl/kernel.hpp>
#include <CL/sycl/nd_item.hpp>
Expand Down Expand Up @@ -856,8 +857,22 @@ class __SYCL_EXPORT handler {
}

template <typename FuncT>
typename std::enable_if<detail::check_fn_signature<
typename std::remove_reference<FuncT>::type, void()>::value>::type
detail::enable_if_t<detail::check_fn_signature<
detail::remove_reference_t<FuncT>, void()>::value>
codeplay_host_task(FuncT Func) {
throwIfActionIsCreated();

MNDRDesc.set(range<1>(1));
MArgs = std::move(MAssociatedAccesors);

MHostTask.reset(new detail::HostTask(std::move(Func)));

MCGType = detail::CG::CODEPLAY_HOST_TASK;
}

template <typename FuncT>
detail::enable_if_t<detail::check_fn_signature<
detail::remove_reference_t<FuncT>, void(interop_handle)>::value>
codeplay_host_task(FuncT Func) {
throwIfActionIsCreated();

Expand Down
26 changes: 21 additions & 5 deletions sycl/include/CL/sycl/interop_handle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,13 @@ class interop_handle {
template <backend BackendName = backend::opencl>
auto get_native_queue() const noexcept ->
typename interop<BackendName, queue>::type {
#ifndef __SYCL_DEVICE_ONLY__
return reinterpret_cast<typename interop<BackendName, queue>::type>(
getNativeQueue());
#else
// we believe this won't be ever called on device side
return nullptr;
#endif
}

/// Returns an underlying OpenCL device associated with the SYCL queue used
Expand All @@ -94,8 +99,13 @@ class interop_handle {
template <backend BackendName = backend::opencl>
auto get_native_device() const noexcept ->
typename interop<BackendName, device>::type {
#ifndef __SYCL_DEVICE_ONLY__
return reinterpret_cast<typename interop<BackendName, device>::type>(
getNativeDevice());
#else
// we believe this won't be ever called on device side
return nullptr;
#endif
}

/// Returns an underlying OpenCL context associated with the SYCL queue used
Expand All @@ -104,14 +114,20 @@ class interop_handle {
template <backend BackendName = backend::opencl>
auto get_native_context() const noexcept ->
typename interop<BackendName, context>::type {
#ifndef __SYCL_DEVICE_ONLY__
return reinterpret_cast<typename interop<BackendName, context>::type>(
getNativeContext());
#else
// we believe this won't be ever called on device side
return nullptr;
#endif
}

private:
friend class detail::ExecCGCommand;
friend class detail::DispatchHostTask;
using ReqToMem = std::pair<detail::Requirement *, pi_mem>;

public:
// TODO set c-tor private
interop_handle(std::vector<ReqToMem> MemObjs,
const std::shared_ptr<detail::queue_impl> &Queue,
Expand All @@ -131,10 +147,10 @@ class interop_handle {
getNativeMem(Req));
}

pi_native_handle getNativeMem(detail::Requirement *Req) const;
pi_native_handle getNativeQueue() const;
pi_native_handle getNativeDevice() const;
pi_native_handle getNativeContext() const;
__SYCL_EXPORT pi_native_handle getNativeMem(detail::Requirement *Req) const;
__SYCL_EXPORT pi_native_handle getNativeQueue() const;
__SYCL_EXPORT pi_native_handle getNativeDevice() const;
__SYCL_EXPORT pi_native_handle getNativeContext() const;

std::shared_ptr<detail::queue_impl> MQueue;
std::shared_ptr<detail::device_impl> MDevice;
Expand Down
51 changes: 47 additions & 4 deletions sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@

#include <detail/error_handling/error_handling.hpp>

#include "CL/sycl/access/access.hpp"
#include <CL/sycl/access/access.hpp>
#include <CL/sycl/backend_types.hpp>
#include <CL/sycl/detail/cg_types.hpp>
#include <CL/sycl/detail/cl.h>
#include <CL/sycl/detail/kernel_desc.hpp>
#include <CL/sycl/detail/memory_manager.hpp>
Expand Down Expand Up @@ -159,6 +160,7 @@ getPiEvents(const std::vector<EventImplPtr> &EventImpls) {

class DispatchHostTask {
ExecCGCommand *MThisCmd;
std::vector<interop_handle::ReqToMem> MReqToMem;

void waitForEvents() const {
std::map<const detail::plugin *, std::vector<EventImplPtr>>
Expand Down Expand Up @@ -187,7 +189,9 @@ class DispatchHostTask {
}

public:
DispatchHostTask(ExecCGCommand *ThisCmd) : MThisCmd{ThisCmd} {}
DispatchHostTask(ExecCGCommand *ThisCmd,
std::vector<interop_handle::ReqToMem> ReqToMem)
: MThisCmd{ThisCmd}, MReqToMem(std::move(ReqToMem)) {}

void operator()() const {
waitForEvents();
Expand All @@ -197,7 +201,15 @@ class DispatchHostTask {
CGHostTask &HostTask = static_cast<CGHostTask &>(MThisCmd->getCG());

// we're ready to call the user-defined lambda now
HostTask.MHostTask->call();
if (HostTask.MHostTask->isInteropTask()) {
interop_handle IH{MReqToMem, HostTask.MQueue,
getSyclObjImpl(HostTask.MQueue->get_device()),
HostTask.MQueue->getContextImplPtr()};

HostTask.MHostTask->call(IH);
} else
HostTask.MHostTask->call();

HostTask.MHostTask.reset();

// unblock user empty command here
Expand Down Expand Up @@ -1943,7 +1955,38 @@ cl_int ExecCGCommand::enqueueImp() {
}
}

MQueue->getThreadPool().submit<DispatchHostTask>(DispatchHostTask(this));
std::vector<interop_handle::ReqToMem> ReqToMem;

if (HostTask->MHostTask->isInteropTask()) {
// Extract the Mem Objects for all Requirements, to ensure they are
// available if a user asks for them inside the interop task scope
const std::vector<Requirement *> &HandlerReq = HostTask->MRequirements;
auto ReqToMemConv = [&ReqToMem, HostTask](Requirement *Req) {
const std::vector<AllocaCommandBase *> &AllocaCmds =
Req->MSYCLMemObj->MRecord->MAllocaCommands;

for (AllocaCommandBase *AllocaCmd : AllocaCmds)
if (HostTask->MQueue == AllocaCmd->getQueue()) {
auto MemArg =
reinterpret_cast<pi_mem>(AllocaCmd->getMemAllocation());
ReqToMem.emplace_back(std::make_pair(Req, MemArg));

return;
}

assert(false &&
"Can't get memory object due to no allocation available");

throw runtime_error(
"Can't get memory object due to no allocation available",
PI_INVALID_MEM_OBJECT);
};
std::for_each(std::begin(HandlerReq), std::end(HandlerReq), ReqToMemConv);
std::sort(std::begin(ReqToMem), std::end(ReqToMem));
}

MQueue->getThreadPool().submit<DispatchHostTask>(
DispatchHostTask(this, std::move(ReqToMem)));

MShouldCompleteEventIfPossible = false;

Expand Down
61 changes: 51 additions & 10 deletions sycl/source/detail/scheduler/graph_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,16 @@ Scheduler::GraphBuilder::addEmptyCmd(Command *Cmd, const std::vector<T *> &Reqs,
return EmptyCmd;
}

static bool isInteropHostTask(const std::unique_ptr<ExecCGCommand> &Cmd) {
if (Cmd->getCG().getType() != CG::CGTYPE::CODEPLAY_HOST_TASK)
return false;

const detail::CGHostTask &HT =
static_cast<detail::CGHostTask &>(Cmd->getCG());

return HT.MHostTask->isInteropTask();
}

Command *
Scheduler::GraphBuilder::addCG(std::unique_ptr<detail::CG> CommandGroup,
QueueImplPtr Queue) {
Expand All @@ -695,13 +705,29 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr<detail::CG> CommandGroup,
printGraphAsDot("before_addCG");

for (Requirement *Req : Reqs) {
MemObjRecord *Record = getOrInsertMemObjRecord(Queue, Req);
markModifiedIfWrite(Record, Req);
MemObjRecord *Record = nullptr;
AllocaCommandBase *AllocaCmd = nullptr;

bool isSameCtx = false;

{
const QueueImplPtr &QueueForAlloca =
isInteropHostTask(NewCmd)
? static_cast<detail::CGHostTask &>(NewCmd->getCG()).MQueue
: Queue;

Record = getOrInsertMemObjRecord(QueueForAlloca, Req);
markModifiedIfWrite(Record, Req);

AllocaCmd = getOrCreateAllocaForReq(Record, Req, QueueForAlloca);

isSameCtx =
sameCtx(QueueForAlloca->getContextImplPtr(), Record->MCurContext);
}

AllocaCommandBase *AllocaCmd = getOrCreateAllocaForReq(Record, Req, Queue);
// If there is alloca command we need to check if the latest memory is in
// required context.
if (sameCtx(Queue->getContextImplPtr(), Record->MCurContext)) {
if (isSameCtx) {
// If the memory is already in the required host context, check if the
// required access mode is valid, remap if not.
if (Record->MCurContext->is_host() &&
Expand All @@ -710,10 +736,24 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr<detail::CG> CommandGroup,
} else {
// Cannot directly copy memory from OpenCL device to OpenCL device -
// create two copies: device->host and host->device.
if (!Queue->is_host() && !Record->MCurContext->is_host())
bool NeedMemMoveToHost = false;
auto MemMoveTargetQueue = Queue;

if (isInteropHostTask(NewCmd)) {
const detail::CGHostTask &HT =
static_cast<detail::CGHostTask &>(NewCmd->getCG());

if (HT.MQueue->getContextImplPtr() != Record->MCurContext) {
NeedMemMoveToHost = true;
MemMoveTargetQueue = HT.MQueue;
}
} else if (!Queue->is_host() && !Record->MCurContext->is_host())
NeedMemMoveToHost = true;

if (NeedMemMoveToHost)
insertMemoryMove(Record, Req,
Scheduler::getInstance().getDefaultHostQueue());
insertMemoryMove(Record, Req, Queue);
insertMemoryMove(Record, Req, MemMoveTargetQueue);
}
std::set<Command *> Deps =
findDepsForReq(Record, Req, Queue->getContextImplPtr());
Expand Down Expand Up @@ -927,10 +967,11 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd,
{
std::unique_ptr<detail::HostTask> HT(new detail::HostTask);
std::unique_ptr<detail::CG> ConnectCG(new detail::CGHostTask(
std::move(HT), /* Args = */ {}, /* ArgsStorage = */ {},
/* AccStorage = */ {}, /* SharedPtrStorage = */ {},
/* Requirements = */ {}, /* DepEvents = */ {DepEvent},
CG::CODEPLAY_HOST_TASK, /* Payload */ {}));
std::move(HT), /* Queue = */ {}, /* Context = */ {}, /* Args = */ {},
/* ArgsStorage = */ {}, /* AccStorage = */ {},
/* SharedPtrStorage = */ {}, /* Requirements = */ {},
/* DepEvents = */ {DepEvent}, CG::CODEPLAY_HOST_TASK,
/* Payload */ {}));
ConnectCmd = new ExecCGCommand(
std::move(ConnectCG), Scheduler::getInstance().getDefaultHostQueue());
}
Expand Down
7 changes: 4 additions & 3 deletions sycl/source/handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,10 @@ event handler::finalize() {
break;
case detail::CG::CODEPLAY_HOST_TASK:
CommandGroup.reset(new detail::CGHostTask(
std::move(MHostTask), std::move(MArgs), std::move(MArgsStorage),
std::move(MAccStorage), std::move(MSharedPtrStorage),
std::move(MRequirements), std::move(MEvents), MCGType, MCodeLoc));
std::move(MHostTask), MQueue, MQueue->getContextImplPtr(),
std::move(MArgs), std::move(MArgsStorage), std::move(MAccStorage),
std::move(MSharedPtrStorage), std::move(MRequirements),
std::move(MEvents), MCGType, MCodeLoc));
break;
case detail::CG::BARRIER:
case detail::CG::BARRIER_WAITLIST:
Expand Down
4 changes: 4 additions & 0 deletions sycl/test/abi/sycl_symbols_linux.dump
Original file line number Diff line number Diff line change
Expand Up @@ -3320,6 +3320,10 @@ _ZNK2cl4sycl13host_selectorclERKNS0_6deviceE
_ZNK2cl4sycl14exception_list3endEv
_ZNK2cl4sycl14exception_list4sizeEv
_ZNK2cl4sycl14exception_list5beginEv
_ZNK2cl4sycl14interop_handle12getNativeMemEPNS0_6detail16AccessorImplHostE
_ZNK2cl4sycl14interop_handle14getNativeQueueEv
_ZNK2cl4sycl14interop_handle15getNativeDeviceEv
_ZNK2cl4sycl14interop_handle16getNativeContextEv
_ZNK2cl4sycl15device_selector13select_deviceEv
_ZNK2cl4sycl15interop_handler12GetNativeMemEPNS0_6detail16AccessorImplHostE
_ZNK2cl4sycl15interop_handler14GetNativeQueueEv
Expand Down
Loading