@@ -127,11 +127,14 @@ static unsigned long long getQueueID(const std::shared_ptr<queue_impl> &Queue) {
127
127
}
128
128
#endif
129
129
130
- static context_impl *getContext (const QueueImplPtr & Queue) {
130
+ static context_impl *getContext (queue_impl * Queue) {
131
131
if (Queue)
132
132
return &Queue->getContextImpl ();
133
133
return nullptr ;
134
134
}
135
+ static context_impl *getContext (const std::shared_ptr<queue_impl> &Queue) {
136
+ return getContext (Queue.get ());
137
+ }
135
138
136
139
#ifdef __SYCL_ENABLE_GNU_DEMANGLING
137
140
struct DemangleHandle {
@@ -503,7 +506,7 @@ void Command::waitForPreparedHostEvents() const {
503
506
HostEvent->waitInternal ();
504
507
}
505
508
506
- void Command::waitForEvents (QueueImplPtr Queue,
509
+ void Command::waitForEvents (queue_impl * Queue,
507
510
std::vector<EventImplPtr> &EventImpls,
508
511
ur_event_handle_t &Event) {
509
512
#ifndef NDEBUG
@@ -559,12 +562,12 @@ void Command::waitForEvents(QueueImplPtr Queue,
559
562
// / references to event_impl class members because Command
560
563
// / should not outlive the event connected to it.
561
564
Command::Command (
562
- CommandType Type, QueueImplPtr Queue,
565
+ CommandType Type, queue_impl * Queue,
563
566
ur_exp_command_buffer_handle_t CommandBuffer,
564
567
const std::vector<ur_exp_command_buffer_sync_point_t > &SyncPoints)
565
- : MQueue(std::move( Queue) ),
566
- MEvent(MQueue ? detail::event_impl::create_device_event(*MQueue )
567
- : detail::event_impl::create_incomplete_host_event()),
568
+ : MQueue(Queue ? Queue-> shared_from_this () : nullptr ),
569
+ MEvent(Queue ? detail::event_impl::create_device_event(*Queue )
570
+ : detail::event_impl::create_incomplete_host_event()),
568
571
MPreparedDepsEvents(MEvent->getPreparedDepsEvents ()),
569
572
MPreparedHostDepsEvents(MEvent->getPreparedHostDepsEvents ()), MType(Type),
570
573
MCommandBuffer(CommandBuffer), MSyncPointDeps(SyncPoints) {
@@ -1027,7 +1030,7 @@ void Command::copySubmissionCodeLocation() {
1027
1030
#endif
1028
1031
}
1029
1032
1030
- AllocaCommandBase::AllocaCommandBase (CommandType Type, QueueImplPtr Queue,
1033
+ AllocaCommandBase::AllocaCommandBase (CommandType Type, queue_impl * Queue,
1031
1034
Requirement Req,
1032
1035
AllocaCommandBase *LinkedAllocaCmd,
1033
1036
bool IsConst)
@@ -1070,10 +1073,10 @@ bool AllocaCommandBase::supportsPostEnqueueCleanup() const { return false; }
1070
1073
1071
1074
bool AllocaCommandBase::readyForCleanup () const { return false ; }
1072
1075
1073
- AllocaCommand::AllocaCommand (QueueImplPtr Queue, Requirement Req,
1076
+ AllocaCommand::AllocaCommand (queue_impl * Queue, Requirement Req,
1074
1077
bool InitFromUserData,
1075
1078
AllocaCommandBase *LinkedAllocaCmd, bool IsConst)
1076
- : AllocaCommandBase(CommandType::ALLOCA, std::move( Queue) , std::move(Req),
1079
+ : AllocaCommandBase(CommandType::ALLOCA, Queue, std::move(Req),
1077
1080
LinkedAllocaCmd, IsConst),
1078
1081
MInitFromUserData(InitFromUserData) {
1079
1082
// Node event must be created before the dependent edge is added to this
@@ -1108,7 +1111,7 @@ ur_result_t AllocaCommand::enqueueImp() {
1108
1111
1109
1112
if (!MQueue) {
1110
1113
// Do not need to make allocation if we have a linked device allocation
1111
- Command::waitForEvents (MQueue, EventImpls, UREvent);
1114
+ Command::waitForEvents (MQueue. get () , EventImpls, UREvent);
1112
1115
MEvent->setHandle (UREvent);
1113
1116
1114
1117
return UR_RESULT_SUCCESS;
@@ -1148,12 +1151,11 @@ void AllocaCommand::printDot(std::ostream &Stream) const {
1148
1151
}
1149
1152
}
1150
1153
1151
- AllocaSubBufCommand::AllocaSubBufCommand (QueueImplPtr Queue, Requirement Req,
1154
+ AllocaSubBufCommand::AllocaSubBufCommand (queue_impl * Queue, Requirement Req,
1152
1155
AllocaCommandBase *ParentAlloca,
1153
1156
std::vector<Command *> &ToEnqueue,
1154
1157
std::vector<Command *> &ToCleanUp)
1155
- : AllocaCommandBase(CommandType::ALLOCA_SUB_BUF, std::move(Queue),
1156
- std::move(Req),
1158
+ : AllocaCommandBase(CommandType::ALLOCA_SUB_BUF, Queue, std::move(Req),
1157
1159
/* LinkedAllocaCmd*/ nullptr, /* IsConst*/ false),
1158
1160
MParentAlloca(ParentAlloca) {
1159
1161
// Node event must be created before the dependent edge
@@ -1234,8 +1236,8 @@ void AllocaSubBufCommand::printDot(std::ostream &Stream) const {
1234
1236
}
1235
1237
}
1236
1238
1237
- ReleaseCommand::ReleaseCommand (QueueImplPtr Queue, AllocaCommandBase *AllocaCmd)
1238
- : Command(CommandType::RELEASE, std::move( Queue) ), MAllocaCmd(AllocaCmd) {
1239
+ ReleaseCommand::ReleaseCommand (queue_impl * Queue, AllocaCommandBase *AllocaCmd)
1240
+ : Command(CommandType::RELEASE, Queue), MAllocaCmd(AllocaCmd) {
1239
1241
emitInstrumentationDataProxy ();
1240
1242
}
1241
1243
@@ -1288,9 +1290,9 @@ ur_result_t ReleaseCommand::enqueueImp() {
1288
1290
}
1289
1291
1290
1292
if (NeedUnmap) {
1291
- const QueueImplPtr & Queue = CurAllocaIsHost
1292
- ? MAllocaCmd->MLinkedAllocaCmd ->getQueue ()
1293
- : MAllocaCmd->getQueue ();
1293
+ queue_impl * Queue = CurAllocaIsHost
1294
+ ? MAllocaCmd->MLinkedAllocaCmd ->getQueue ()
1295
+ : MAllocaCmd->getQueue ();
1294
1296
1295
1297
assert (Queue);
1296
1298
@@ -1321,7 +1323,7 @@ ur_result_t ReleaseCommand::enqueueImp() {
1321
1323
}
1322
1324
ur_event_handle_t UREvent = nullptr ;
1323
1325
if (SkipRelease)
1324
- Command::waitForEvents (MQueue, EventImpls, UREvent);
1326
+ Command::waitForEvents (MQueue. get () , EventImpls, UREvent);
1325
1327
else {
1326
1328
if (auto Result = callMemOpHelper (
1327
1329
MemoryManager::release, getContext (MQueue),
@@ -1359,11 +1361,10 @@ bool ReleaseCommand::supportsPostEnqueueCleanup() const { return false; }
1359
1361
bool ReleaseCommand::readyForCleanup () const { return false ; }
1360
1362
1361
1363
MapMemObject::MapMemObject (AllocaCommandBase *SrcAllocaCmd, Requirement Req,
1362
- void **DstPtr, QueueImplPtr Queue,
1364
+ void **DstPtr, queue_impl * Queue,
1363
1365
access::mode MapMode)
1364
- : Command(CommandType::MAP_MEM_OBJ, std::move(Queue)),
1365
- MSrcAllocaCmd(SrcAllocaCmd), MSrcReq(std::move(Req)), MDstPtr(DstPtr),
1366
- MMapMode(MapMode) {
1366
+ : Command(CommandType::MAP_MEM_OBJ, Queue), MSrcAllocaCmd(SrcAllocaCmd),
1367
+ MSrcReq(std::move(Req)), MDstPtr(DstPtr), MMapMode(MapMode) {
1367
1368
emitInstrumentationDataProxy ();
1368
1369
}
1369
1370
@@ -1423,9 +1424,9 @@ void MapMemObject::printDot(std::ostream &Stream) const {
1423
1424
}
1424
1425
1425
1426
UnMapMemObject::UnMapMemObject (AllocaCommandBase *DstAllocaCmd, Requirement Req,
1426
- void **SrcPtr, QueueImplPtr Queue)
1427
- : Command(CommandType::UNMAP_MEM_OBJ, std::move( Queue)),
1428
- MDstAllocaCmd(DstAllocaCmd), MDstReq(std::move(Req)), MSrcPtr(SrcPtr) {
1427
+ void **SrcPtr, queue_impl * Queue)
1428
+ : Command(CommandType::UNMAP_MEM_OBJ, Queue), MDstAllocaCmd(DstAllocaCmd ),
1429
+ MDstReq(std::move(Req)), MSrcPtr(SrcPtr) {
1429
1430
emitInstrumentationDataProxy ();
1430
1431
}
1431
1432
@@ -1509,11 +1510,11 @@ MemCpyCommand::MemCpyCommand(Requirement SrcReq,
1509
1510
AllocaCommandBase *SrcAllocaCmd,
1510
1511
Requirement DstReq,
1511
1512
AllocaCommandBase *DstAllocaCmd,
1512
- QueueImplPtr SrcQueue, QueueImplPtr DstQueue)
1513
- : Command(CommandType::COPY_MEMORY, std::move( DstQueue) ),
1514
- MSrcQueue(SrcQueue), MSrcReq(std::move(SrcReq) ),
1515
- MSrcAllocaCmd(SrcAllocaCmd), MDstReq( std::move(DstReq) ),
1516
- MDstAllocaCmd(DstAllocaCmd) {
1513
+ queue_impl * SrcQueue, queue_impl * DstQueue)
1514
+ : Command(CommandType::COPY_MEMORY, DstQueue),
1515
+ MSrcQueue(SrcQueue ? SrcQueue-> shared_from_this () : nullptr ),
1516
+ MSrcReq( std::move(SrcReq)), MSrcAllocaCmd(SrcAllocaCmd ),
1517
+ MDstReq(std::move(DstReq)), MDstAllocaCmd(DstAllocaCmd) {
1517
1518
if (MSrcQueue) {
1518
1519
MEvent->setContextImpl (MSrcQueue->getContextImplPtr ());
1519
1520
}
@@ -1645,7 +1646,7 @@ ur_result_t UpdateHostRequirementCommand::enqueueImp() {
1645
1646
waitForPreparedHostEvents ();
1646
1647
std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
1647
1648
ur_event_handle_t UREvent = nullptr ;
1648
- Command::waitForEvents (MQueue, EventImpls, UREvent);
1649
+ Command::waitForEvents (MQueue. get () , EventImpls, UREvent);
1649
1650
MEvent->setHandle (UREvent);
1650
1651
1651
1652
assert (MSrcAllocaCmd && " Expected valid alloca command" );
@@ -1682,11 +1683,11 @@ void UpdateHostRequirementCommand::printDot(std::ostream &Stream) const {
1682
1683
MemCpyCommandHost::MemCpyCommandHost (Requirement SrcReq,
1683
1684
AllocaCommandBase *SrcAllocaCmd,
1684
1685
Requirement DstReq, void **DstPtr,
1685
- QueueImplPtr SrcQueue,
1686
- QueueImplPtr DstQueue)
1687
- : Command(CommandType::COPY_MEMORY, std::move(DstQueue) ),
1688
- MSrcQueue(SrcQueue), MSrcReq(std::move(SrcReq)),
1689
- MSrcAllocaCmd(SrcAllocaCmd), MDstReq(std::move(DstReq)), MDstPtr(DstPtr) {
1686
+ queue_impl * SrcQueue, queue_impl *DstQueue)
1687
+ : Command(CommandType::COPY_MEMORY, DstQueue),
1688
+ MSrcQueue(SrcQueue ? SrcQueue-> shared_from_this () : nullptr ),
1689
+ MSrcReq(std::move(SrcReq)), MSrcAllocaCmd(SrcAllocaCmd ),
1690
+ MDstReq(std::move(DstReq)), MDstPtr(DstPtr) {
1690
1691
if (MSrcQueue) {
1691
1692
MEvent->setContextImpl (MSrcQueue->getContextImplPtr ());
1692
1693
}
@@ -1728,7 +1729,7 @@ ContextImplPtr MemCpyCommandHost::getWorkerContext() const {
1728
1729
}
1729
1730
1730
1731
ur_result_t MemCpyCommandHost::enqueueImp () {
1731
- const QueueImplPtr & Queue = MWorkerQueue;
1732
+ queue_impl * Queue = MWorkerQueue. get () ;
1732
1733
waitForPreparedHostEvents ();
1733
1734
std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
1734
1735
std::vector<ur_event_handle_t > RawEvents = getUrEvents (EventImpls);
@@ -1767,7 +1768,7 @@ EmptyCommand::EmptyCommand() : Command(CommandType::EMPTY_TASK, nullptr) {
1767
1768
ur_result_t EmptyCommand::enqueueImp () {
1768
1769
waitForPreparedHostEvents ();
1769
1770
ur_event_handle_t UREvent = nullptr ;
1770
- waitForEvents (MQueue, MPreparedDepsEvents, UREvent);
1771
+ waitForEvents (MQueue. get () , MPreparedDepsEvents, UREvent);
1771
1772
MEvent->setHandle (UREvent);
1772
1773
return UR_RESULT_SUCCESS;
1773
1774
}
@@ -1851,9 +1852,9 @@ void MemCpyCommandHost::printDot(std::ostream &Stream) const {
1851
1852
}
1852
1853
1853
1854
UpdateHostRequirementCommand::UpdateHostRequirementCommand (
1854
- QueueImplPtr Queue, Requirement Req, AllocaCommandBase *SrcAllocaCmd,
1855
+ queue_impl * Queue, Requirement Req, AllocaCommandBase *SrcAllocaCmd,
1855
1856
void **DstPtr)
1856
- : Command(CommandType::UPDATE_REQUIREMENT, std::move( Queue) ),
1857
+ : Command(CommandType::UPDATE_REQUIREMENT, Queue),
1857
1858
MSrcAllocaCmd(SrcAllocaCmd), MDstReq(std::move(Req)), MDstPtr(DstPtr) {
1858
1859
1859
1860
emitInstrumentationDataProxy ();
@@ -1949,11 +1950,10 @@ static std::string_view cgTypeToString(detail::CGType Type) {
1949
1950
}
1950
1951
1951
1952
ExecCGCommand::ExecCGCommand (
1952
- std::unique_ptr<detail::CG> CommandGroup, QueueImplPtr Queue,
1953
+ std::unique_ptr<detail::CG> CommandGroup, queue_impl * Queue,
1953
1954
bool EventNeeded, ur_exp_command_buffer_handle_t CommandBuffer,
1954
1955
const std::vector<ur_exp_command_buffer_sync_point_t > &Dependencies)
1955
- : Command(CommandType::RUN_CG, std::move(Queue), CommandBuffer,
1956
- Dependencies),
1956
+ : Command(CommandType::RUN_CG, Queue, CommandBuffer, Dependencies),
1957
1957
MEventNeeded(EventNeeded), MCommandGroup(std::move(CommandGroup)) {
1958
1958
if (MCommandGroup->getType () == detail::CGType::CodeplayHostTask) {
1959
1959
MEvent->setSubmittedQueue (
@@ -2770,20 +2770,18 @@ void enqueueImpKernel(
2770
2770
}
2771
2771
}
2772
2772
2773
- ur_result_t enqueueReadWriteHostPipe (const QueueImplPtr &Queue,
2773
+ ur_result_t enqueueReadWriteHostPipe (queue_impl &Queue,
2774
2774
const std::string &PipeName, bool blocking,
2775
2775
void *ptr, size_t size,
2776
2776
std::vector<ur_event_handle_t > &RawEvents,
2777
2777
detail::event_impl *OutEventImpl,
2778
2778
bool read) {
2779
- assert (Queue &&
2780
- " ReadWrite host pipe submissions should have an associated queue" );
2781
2779
detail::HostPipeMapEntry *hostPipeEntry =
2782
2780
ProgramManager::getInstance ().getHostPipeEntry (PipeName);
2783
2781
2784
2782
ur_program_handle_t Program = nullptr ;
2785
- device Device = Queue-> get_device ();
2786
- ContextImplPtr ContextImpl = Queue-> getContextImplPtr ();
2783
+ device Device = Queue. get_device ();
2784
+ ContextImplPtr ContextImpl = Queue. getContextImplPtr ();
2787
2785
std::optional<ur_program_handle_t > CachedProgram =
2788
2786
ContextImpl->getProgramForHostPipe (Device, hostPipeEntry);
2789
2787
if (CachedProgram)
@@ -2792,17 +2790,16 @@ ur_result_t enqueueReadWriteHostPipe(const QueueImplPtr &Queue,
2792
2790
// If there was no cached program, build one.
2793
2791
device_image_plain devImgPlain =
2794
2792
ProgramManager::getInstance ().getDeviceImageFromBinaryImage (
2795
- hostPipeEntry->getDevBinImage (), Queue->get_context (),
2796
- Queue->get_device ());
2793
+ hostPipeEntry->getDevBinImage (), Queue.get_context (), Device);
2797
2794
device_image_plain BuiltImage = ProgramManager::getInstance ().build (
2798
2795
std::move (devImgPlain), {std::move (Device)}, {});
2799
2796
Program = getSyclObjImpl (BuiltImage)->get_ur_program_ref ();
2800
2797
}
2801
2798
assert (Program && " Program for this hostpipe is not compiled." );
2802
2799
2803
- const AdapterPtr &Adapter = Queue-> getAdapter ();
2800
+ const AdapterPtr &Adapter = Queue. getAdapter ();
2804
2801
2805
- ur_queue_handle_t ur_q = Queue-> getHandleRef ();
2802
+ ur_queue_handle_t ur_q = Queue. getHandleRef ();
2806
2803
ur_result_t Error;
2807
2804
2808
2805
ur_event_handle_t UREvent = nullptr ;
@@ -3660,7 +3657,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
3660
3657
if (!EventImpl) {
3661
3658
EventImpl = MEvent.get ();
3662
3659
}
3663
- return enqueueReadWriteHostPipe (MQueue, pipeName, blocking, hostPtr,
3660
+ return enqueueReadWriteHostPipe (* MQueue, pipeName, blocking, hostPtr,
3664
3661
typeSize, RawEvents, EventImpl, read);
3665
3662
}
3666
3663
case CGType::ExecCommandBuffer: {
@@ -3795,7 +3792,7 @@ bool ExecCGCommand::readyForCleanup() const {
3795
3792
}
3796
3793
3797
3794
UpdateCommandBufferCommand::UpdateCommandBufferCommand (
3798
- QueueImplPtr Queue,
3795
+ queue_impl * Queue,
3799
3796
ext::oneapi::experimental::detail::exec_graph_impl *Graph,
3800
3797
std::vector<std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
3801
3798
Nodes)
@@ -3806,7 +3803,7 @@ ur_result_t UpdateCommandBufferCommand::enqueueImp() {
3806
3803
waitForPreparedHostEvents ();
3807
3804
std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
3808
3805
ur_event_handle_t UREvent = nullptr ;
3809
- Command::waitForEvents (MQueue, EventImpls, UREvent);
3806
+ Command::waitForEvents (MQueue. get () , EventImpls, UREvent);
3810
3807
MEvent->setHandle (UREvent);
3811
3808
3812
3809
auto CheckAndFindAlloca = [](Requirement *Req, const DepDesc &Dep) {
@@ -3878,6 +3875,15 @@ void UpdateCommandBufferCommand::printDot(std::ostream &Stream) const {
3878
3875
void UpdateCommandBufferCommand::emitInstrumentationData () {}
3879
3876
bool UpdateCommandBufferCommand::producesPiEvent () const { return false ; }
3880
3877
3878
+ CGHostTask::CGHostTask (std::shared_ptr<HostTask> HostTask,
3879
+ detail::queue_impl *Queue,
3880
+ std::shared_ptr<detail::context_impl> Context,
3881
+ std::vector<ArgDesc> Args, CG::StorageInitHelper CGData,
3882
+ CGType Type, detail::code_location loc)
3883
+ : CG(Type, std::move(CGData), std::move(loc)),
3884
+ MHostTask(std::move(HostTask)),
3885
+ MQueue(Queue ? Queue->shared_from_this () : nullptr), MContext(Context),
3886
+ MArgs(std::move(Args)) {}
3881
3887
} // namespace detail
3882
3888
} // namespace _V1
3883
3889
} // namespace sycl
0 commit comments