Skip to content

Commit de85c35

Browse files
[SYCL] Enable non-read-write memory object mapping in scheduler (#1390)
Previously all memory objects were mapped with read-write access whenever they were requested on host so that they were fully accessible on host until their unmapping. This patch changes this behaviour: now memory objects are mapped with just the required access mode. If an incompatible access mode is requested on host afterwards, the object is remapped. Signed-off-by: Sergey Semenov <[email protected]>
1 parent f6d6baa commit de85c35

File tree

5 files changed

+178
-14
lines changed

5 files changed

+178
-14
lines changed

sycl/source/detail/scheduler/commands.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -828,9 +828,11 @@ void ReleaseCommand::printDot(std::ostream &Stream) const {
828828
}
829829

830830
MapMemObject::MapMemObject(AllocaCommandBase *SrcAllocaCmd, Requirement Req,
831-
void **DstPtr, QueueImplPtr Queue)
831+
void **DstPtr, QueueImplPtr Queue,
832+
access::mode MapMode)
832833
: Command(CommandType::MAP_MEM_OBJ, std::move(Queue)),
833-
MSrcAllocaCmd(SrcAllocaCmd), MSrcReq(std::move(Req)), MDstPtr(DstPtr) {
834+
MSrcAllocaCmd(SrcAllocaCmd), MSrcReq(std::move(Req)), MDstPtr(DstPtr),
835+
MMapMode(MapMode) {
834836
emitInstrumentationDataProxy();
835837
}
836838

@@ -861,9 +863,8 @@ cl_int MapMemObject::enqueueImp() {
861863
RT::PiEvent &Event = MEvent->getHandleRef();
862864
*MDstPtr = MemoryManager::map(
863865
MSrcAllocaCmd->getSYCLMemObj(), MSrcAllocaCmd->getMemAllocation(), MQueue,
864-
MSrcReq.MAccessMode, MSrcReq.MDims, MSrcReq.MMemoryRange,
865-
MSrcReq.MAccessRange, MSrcReq.MOffset, MSrcReq.MElemSize,
866-
std::move(RawEvents), Event);
866+
MMapMode, MSrcReq.MDims, MSrcReq.MMemoryRange, MSrcReq.MAccessRange,
867+
MSrcReq.MOffset, MSrcReq.MElemSize, std::move(RawEvents), Event);
867868
return CL_SUCCESS;
868869
}
869870

sycl/source/detail/scheduler/commands.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ class AllocaSubBufCommand : public AllocaCommandBase {
321321
class MapMemObject : public Command {
322322
public:
323323
MapMemObject(AllocaCommandBase *SrcAllocaCmd, Requirement Req, void **DstPtr,
324-
QueueImplPtr Queue);
324+
QueueImplPtr Queue, access::mode MapMode);
325325

326326
void printDot(std::ostream &Stream) const final;
327327
const Requirement *getRequirement() const final { return &MSrcReq; }
@@ -333,6 +333,7 @@ class MapMemObject : public Command {
333333
AllocaCommandBase *MSrcAllocaCmd = nullptr;
334334
Requirement MSrcReq;
335335
void **MDstPtr = nullptr;
336+
access::mode MMapMode;
336337
};
337338

338339
class UnMapMemObject : public Command {

sycl/source/detail/scheduler/graph_builder.cpp

Lines changed: 78 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,24 @@ static bool IsSuitableSubReq(const Requirement *Req) {
4848
return Req->MIsSubBuffer;
4949
}
5050

51+
// Checks if the required access mode is allowed under the current one
52+
static bool isAccessModeAllowed(access::mode Required, access::mode Current) {
53+
switch (Current) {
54+
case access::mode::read:
55+
return (Required == Current);
56+
case access::mode::write:
57+
assert(false && "Write only access is expected to be mapped as read_write");
58+
return (Required == Current || Required == access::mode::discard_write);
59+
case access::mode::read_write:
60+
case access::mode::atomic:
61+
case access::mode::discard_write:
62+
case access::mode::discard_read_write:
63+
return true;
64+
}
65+
assert(false);
66+
return false;
67+
}
68+
5169
Scheduler::GraphBuilder::GraphBuilder() {
5270
if (const char *EnvVarCStr = SYCLConfig<SYCL_PRINT_EXECUTION_GRAPH>::get()) {
5371
std::string GraphPrintOpts(EnvVarCStr);
@@ -199,7 +217,8 @@ UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd(
199217
// Takes linked alloca commands. Makes AllocaCmdDst command active using map
200218
// or unmap operation.
201219
static Command *insertMapUnmapForLinkedCmds(AllocaCommandBase *AllocaCmdSrc,
202-
AllocaCommandBase *AllocaCmdDst) {
220+
AllocaCommandBase *AllocaCmdDst,
221+
access::mode MapMode) {
203222
assert(AllocaCmdSrc->MLinkedAllocaCmd == AllocaCmdDst &&
204223
"Expected linked alloca commands");
205224
assert(AllocaCmdSrc->MIsActive &&
@@ -215,9 +234,9 @@ static Command *insertMapUnmapForLinkedCmds(AllocaCommandBase *AllocaCmdSrc,
215234
return UnMapCmd;
216235
}
217236

218-
MapMemObject *MapCmd =
219-
new MapMemObject(AllocaCmdSrc, *AllocaCmdSrc->getRequirement(),
220-
&AllocaCmdDst->MMemAllocation, AllocaCmdSrc->getQueue());
237+
MapMemObject *MapCmd = new MapMemObject(
238+
AllocaCmdSrc, *AllocaCmdSrc->getRequirement(),
239+
&AllocaCmdDst->MMemAllocation, AllocaCmdSrc->getQueue(), MapMode);
221240

222241
std::swap(AllocaCmdSrc->MIsActive, AllocaCmdDst->MIsActive);
223242

@@ -274,7 +293,12 @@ Command *Scheduler::GraphBuilder::insertMemoryMove(MemObjRecord *Record,
274293
Command *NewCmd = nullptr;
275294

276295
if (AllocaCmdSrc->MLinkedAllocaCmd == AllocaCmdDst) {
277-
NewCmd = insertMapUnmapForLinkedCmds(AllocaCmdSrc, AllocaCmdDst);
296+
// Map write only as read-write
297+
access::mode MapMode = Req->MAccessMode;
298+
if (MapMode == access::mode::write)
299+
MapMode = access::mode::read_write;
300+
NewCmd = insertMapUnmapForLinkedCmds(AllocaCmdSrc, AllocaCmdDst, MapMode);
301+
Record->MHostAccess = MapMode;
278302
} else {
279303

280304
// Full copy of buffer is needed to avoid loss of data that may be caused
@@ -295,6 +319,43 @@ Command *Scheduler::GraphBuilder::insertMemoryMove(MemObjRecord *Record,
295319
return NewCmd;
296320
}
297321

322+
Command *Scheduler::GraphBuilder::remapMemoryObject(
323+
MemObjRecord *Record, Requirement *Req, AllocaCommandBase *HostAllocaCmd) {
324+
assert(HostAllocaCmd->getQueue()->is_host() &&
325+
"Host alloca command expected");
326+
assert(HostAllocaCmd->MIsActive && "Active alloca command expected");
327+
328+
AllocaCommandBase *LinkedAllocaCmd = HostAllocaCmd->MLinkedAllocaCmd;
329+
assert(LinkedAllocaCmd && "Linked alloca command expected");
330+
331+
std::set<Command *> Deps = findDepsForReq(Record, Req, Record->MCurContext);
332+
333+
UnMapMemObject *UnMapCmd = new UnMapMemObject(
334+
LinkedAllocaCmd, *LinkedAllocaCmd->getRequirement(),
335+
&HostAllocaCmd->MMemAllocation, LinkedAllocaCmd->getQueue());
336+
337+
// Map write only as read-write
338+
access::mode MapMode = Req->MAccessMode;
339+
if (MapMode == access::mode::write)
340+
MapMode = access::mode::read_write;
341+
MapMemObject *MapCmd = new MapMemObject(
342+
LinkedAllocaCmd, *LinkedAllocaCmd->getRequirement(),
343+
&HostAllocaCmd->MMemAllocation, LinkedAllocaCmd->getQueue(), MapMode);
344+
345+
for (Command *Dep : Deps) {
346+
UnMapCmd->addDep(DepDesc{Dep, UnMapCmd->getRequirement(), LinkedAllocaCmd});
347+
Dep->addUser(UnMapCmd);
348+
}
349+
350+
MapCmd->addDep(DepDesc{UnMapCmd, MapCmd->getRequirement(), HostAllocaCmd});
351+
UnMapCmd->addUser(MapCmd);
352+
353+
updateLeaves(Deps, Record, access::mode::read_write);
354+
addNodeToLeaves(Record, MapCmd, access::mode::read_write);
355+
Record->MHostAccess = MapMode;
356+
return MapCmd;
357+
}
358+
298359
// The function adds copy operation of the up to date'st memory to the memory
299360
// pointed by Req.
300361
Command *Scheduler::GraphBuilder::addCopyBack(Requirement *Req) {
@@ -349,8 +410,11 @@ Command *Scheduler::GraphBuilder::addHostAccessor(Requirement *Req,
349410
AllocaCommandBase *HostAllocaCmd =
350411
getOrCreateAllocaForReq(Record, Req, HostQueue);
351412

352-
if (!sameCtx(HostAllocaCmd->getQueue()->getContextImplPtr(),
353-
Record->MCurContext))
413+
if (sameCtx(HostAllocaCmd->getQueue()->getContextImplPtr(),
414+
Record->MCurContext)) {
415+
if (!isAccessModeAllowed(Req->MAccessMode, Record->MHostAccess))
416+
remapMemoryObject(Record, Req, HostAllocaCmd);
417+
} else
354418
insertMemoryMove(Record, Req, HostQueue);
355419

356420
Command *UpdateHostAccCmd = insertUpdateHostReqCmd(Record, Req, HostQueue);
@@ -600,7 +664,13 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr<detail::CG> CommandGroup,
600664
AllocaCommandBase *AllocaCmd = getOrCreateAllocaForReq(Record, Req, Queue);
601665
// If there is alloca command we need to check if the latest memory is in
602666
// required context.
603-
if (!sameCtx(Queue->getContextImplPtr(), Record->MCurContext)) {
667+
if (sameCtx(Queue->getContextImplPtr(), Record->MCurContext)) {
668+
// If the memory is already in the required host context, check if the
669+
// required access mode is valid, remap if not.
670+
if (Record->MCurContext->is_host() &&
671+
!isAccessModeAllowed(Req->MAccessMode, Record->MHostAccess))
672+
remapMemoryObject(Record, Req, AllocaCmd);
673+
} else {
604674
// Cannot directly copy memory from OpenCL device to OpenCL device -
605675
// create two copies: device->host and host->device.
606676
if (!Queue->is_host() && !Record->MCurContext->is_host())

sycl/source/detail/scheduler/scheduler.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ struct MemObjRecord {
5050
// The context which has the latest state of the memory object.
5151
ContextImplPtr MCurContext;
5252

53+
// The mode this object can be accessed with from the host context.
54+
// Valid only if the current context is host.
55+
access::mode MHostAccess = access::mode::read_write;
56+
5357
// The flag indicates that the content of the memory object was/will be
5458
// modified. Used while deciding if copy back needed.
5559
bool MMemModified = false;
@@ -171,6 +175,11 @@ class Scheduler {
171175
Command *insertMemoryMove(MemObjRecord *Record, Requirement *Req,
172176
const QueueImplPtr &Queue);
173177

178+
// Inserts commands required to remap the memory object to its current host
179+
// context so that the required access mode becomes valid.
180+
Command *remapMemoryObject(MemObjRecord *Record, Requirement *Req,
181+
AllocaCommandBase *HostAllocaCmd);
182+
174183
UpdateHostRequirementCommand *
175184
insertUpdateHostReqCmd(MemObjRecord *Record, Requirement *Req,
176185
const QueueImplPtr &Queue);
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
// RUN: %clangxx -fsycl %s -o %t.out
2+
// RUN: env SYCL_PI_TRACE=1 %CPU_RUN_PLACEHOLDER %t.out 2>&1 %CPU_CHECK_PLACEHOLDER
3+
#include <CL/sycl.hpp>
4+
#include <cassert>
5+
#include <cstddef>
6+
7+
using namespace cl::sycl;
8+
9+
class Foo;
10+
class Bar;
11+
12+
// This test checks that memory objects are remapped on requesting an access mode
13+
// incompatible with the current mapping. Write access is mapped as read-write.
14+
int main() {
15+
queue Q;
16+
17+
std::size_t Size = 64;
18+
range<1> Range{Size};
19+
buffer<int, 1> BufA{Range};
20+
buffer<int, 1> BufB{Range};
21+
22+
Q.submit([&](handler &Cgh) {
23+
auto AccA = BufA.get_access<access::mode::read_write>(Cgh);
24+
auto AccB = BufB.get_access<access::mode::read_write>(Cgh);
25+
Cgh.parallel_for<Foo>(Range, [=](id<1> Idx) {
26+
AccA[Idx] = Idx[0];
27+
AccB[Idx] = Idx[0];
28+
});
29+
});
30+
31+
{
32+
// Check access mode flags
33+
// CHECK: piEnqueueMemBufferMap
34+
// CHECK-NEXT: :
35+
// CHECK-NEXT: :
36+
// CHECK-NEXT: :
37+
// CHECK-NEXT: : 1
38+
// CHECK: piEnqueueMemBufferMap
39+
// CHECK-NEXT: :
40+
// CHECK-NEXT: :
41+
// CHECK-NEXT: :
42+
// CHECK-NEXT: : 1
43+
auto AccA = BufA.get_access<access::mode::read>();
44+
auto AccB = BufB.get_access<access::mode::read>();
45+
for (std::size_t I = 0; I < Size; ++I) {
46+
assert(AccA[I] == I);
47+
assert(AccB[I] == I);
48+
}
49+
}
50+
{
51+
// CHECK: piEnqueueMemUnmap
52+
// CHECK: piEnqueueMemBufferMap
53+
// CHECK-NEXT: :
54+
// CHECK-NEXT: :
55+
// CHECK-NEXT: :
56+
// CHECK-NEXT: : 3
57+
auto AccA = BufA.get_access<access::mode::write>();
58+
for (std::size_t I = 0; I < Size; ++I)
59+
AccA[I] = 2 * I;
60+
}
61+
62+
queue HostQ{host_selector()};
63+
// CHECK: piEnqueueMemUnmap
64+
// CHECK: piEnqueueMemBufferMap
65+
// CHECK-NEXT: :
66+
// CHECK-NEXT: :
67+
// CHECK-NEXT: :
68+
// CHECK-NEXT: : 3
69+
HostQ.submit([&](handler &Cgh) {
70+
auto AccB = BufB.get_access<access::mode::write>(Cgh);
71+
Cgh.parallel_for<Bar>(Range, [=](id<1> Idx) {
72+
AccB[Idx] = 2 * Idx[0];
73+
});
74+
});
75+
76+
// CHECK-NOT: piEnqueueMemBufferMap
77+
auto AccA = BufA.get_access<access::mode::read>();
78+
auto AccB = BufB.get_access<access::mode::read>();
79+
for (std::size_t I = 0; I < Size; ++I) {
80+
assert(AccA[I] == 2 * I);
81+
assert(AccB[I] == 2 * I);
82+
}
83+
}

0 commit comments

Comments
 (0)