Skip to content

Commit 92d35cd

Browse files
authored
[SYCL] Optimize memory transfers (#6213)
Optimize memory transfers by removing a redundant host to host data transfer when the data in a buffer is copied the first time from a user supplied const pointer on the host to a device that does not support host-unified memory.
1 parent 7827590 commit 92d35cd

File tree

6 files changed

+33
-29
lines changed

6 files changed

+33
-29
lines changed

sycl/include/sycl/detail/sycl_mem_obj_t.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,8 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI {
312312

313313
bool isInterop() const;
314314

315+
bool isHostPointerReadOnly() const { return MHostPtrReadOnly; }
316+
315317
protected:
316318
// An allocateMem helper that determines which host ptr to use
317319
void determineHostPtr(const ContextImplPtr &Context, bool InitFromUserData,

sycl/source/detail/memory_manager.cpp

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -276,16 +276,12 @@ void *MemoryManager::allocate(ContextImplPtr TargetContext, SYCLMemObjI *MemObj,
276276
void *MemoryManager::allocateHostMemory(SYCLMemObjI *MemObj, void *UserPtr,
277277
bool HostPtrReadOnly, size_t Size,
278278
const sycl::property_list &) {
279-
// Can return user pointer directly if it points to writable memory.
280-
if (UserPtr && HostPtrReadOnly == false)
279+
// Can return user pointer directly if it is not a nullptr.
280+
if (UserPtr)
281281
return UserPtr;
282282

283-
void *NewMem = MemObj->allocateHostMem();
284-
// Need to initialize new memory if user provides pointer to read only
285-
// memory.
286-
if (UserPtr && HostPtrReadOnly == true)
287-
std::memcpy((char *)NewMem, (char *)UserPtr, Size);
288-
return NewMem;
283+
return MemObj->allocateHostMem();
284+
;
289285
}
290286

291287
void *MemoryManager::allocateInteropMemObject(
@@ -312,8 +308,7 @@ static RT::PiMemFlags getMemObjCreationFlags(void *UserPtr,
312308
RT::PiMemFlags Result =
313309
HostPtrReadOnly ? PI_MEM_ACCESS_READ_ONLY : PI_MEM_FLAGS_ACCESS_RW;
314310
if (UserPtr)
315-
Result |= HostPtrReadOnly ? PI_MEM_FLAGS_HOST_PTR_COPY
316-
: PI_MEM_FLAGS_HOST_PTR_USE;
311+
Result |= PI_MEM_FLAGS_HOST_PTR_USE;
317312
return Result;
318313
}
319314

sycl/source/detail/scheduler/commands.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -833,10 +833,11 @@ const char *Command::getBlockReason() const {
833833

834834
AllocaCommandBase::AllocaCommandBase(CommandType Type, QueueImplPtr Queue,
835835
Requirement Req,
836-
AllocaCommandBase *LinkedAllocaCmd)
836+
AllocaCommandBase *LinkedAllocaCmd,
837+
bool IsConst)
837838
: Command(Type, Queue), MLinkedAllocaCmd(LinkedAllocaCmd),
838-
MIsLeaderAlloca(nullptr == LinkedAllocaCmd), MRequirement(std::move(Req)),
839-
MReleaseCmd(Queue, this) {
839+
MIsLeaderAlloca(nullptr == LinkedAllocaCmd), MIsConst(IsConst),
840+
MRequirement(std::move(Req)), MReleaseCmd(Queue, this) {
840841
MRequirement.MAccessMode = access::mode::read_write;
841842
emitInstrumentationDataProxy();
842843
}
@@ -868,9 +869,9 @@ bool AllocaCommandBase::supportsPostEnqueueCleanup() const { return false; }
868869

869870
AllocaCommand::AllocaCommand(QueueImplPtr Queue, Requirement Req,
870871
bool InitFromUserData,
871-
AllocaCommandBase *LinkedAllocaCmd)
872+
AllocaCommandBase *LinkedAllocaCmd, bool IsConst)
872873
: AllocaCommandBase(CommandType::ALLOCA, std::move(Queue), std::move(Req),
873-
LinkedAllocaCmd),
874+
LinkedAllocaCmd, IsConst),
874875
MInitFromUserData(InitFromUserData) {
875876
// Node event must be created before the dependent edge is added to this node,
876877
// so this call must be before the addDep() call.
@@ -949,7 +950,7 @@ AllocaSubBufCommand::AllocaSubBufCommand(QueueImplPtr Queue, Requirement Req,
949950
std::vector<Command *> &ToCleanUp)
950951
: AllocaCommandBase(CommandType::ALLOCA_SUB_BUF, std::move(Queue),
951952
std::move(Req),
952-
/*LinkedAllocaCmd*/ nullptr),
953+
/*LinkedAllocaCmd*/ nullptr, /*IsConst*/ false),
953954
MParentAlloca(ParentAlloca) {
954955
// Node event must be created before the dependent edge
955956
// is added to this node, so this call must be before

sycl/source/detail/scheduler/commands.hpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,7 @@ class ReleaseCommand : public Command {
364364
class AllocaCommandBase : public Command {
365365
public:
366366
AllocaCommandBase(CommandType Type, QueueImplPtr Queue, Requirement Req,
367-
AllocaCommandBase *LinkedAllocaCmd);
367+
AllocaCommandBase *LinkedAllocaCmd, bool IsConst);
368368

369369
ReleaseCommand *getReleaseCmd() { return &MReleaseCmd; }
370370

@@ -394,6 +394,8 @@ class AllocaCommandBase : public Command {
394394
/// Indicates that the command owns memory allocation in case of connected
395395
/// alloca command.
396396
bool MIsLeaderAlloca = true;
397+
// Indicates that the data in this allocation must not be modified
398+
bool MIsConst = false;
397399

398400
protected:
399401
Requirement MRequirement;
@@ -406,7 +408,8 @@ class AllocaCommand : public AllocaCommandBase {
406408
public:
407409
AllocaCommand(QueueImplPtr Queue, Requirement Req,
408410
bool InitFromUserData = true,
409-
AllocaCommandBase *LinkedAllocaCmd = nullptr);
411+
AllocaCommandBase *LinkedAllocaCmd = nullptr,
412+
bool IsConst = false);
410413

411414
void *getMemAllocation() const final { return MMemAllocation; }
412415
void printDot(std::ostream &Stream) const final;

sycl/source/detail/scheduler/graph_builder.cpp

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -627,17 +627,18 @@ DepDesc Scheduler::GraphBuilder::findDepForRecord(Command *Cmd,
627627

628628
// The function searches for the alloca command matching context and
629629
// requirement.
630-
AllocaCommandBase *
631-
Scheduler::GraphBuilder::findAllocaForReq(MemObjRecord *Record,
632-
const Requirement *Req,
633-
const ContextImplPtr &Context) {
634-
auto IsSuitableAlloca = [&Context, Req](AllocaCommandBase *AllocaCmd) {
630+
AllocaCommandBase *Scheduler::GraphBuilder::findAllocaForReq(
631+
MemObjRecord *Record, const Requirement *Req, const ContextImplPtr &Context,
632+
bool AllowConst) {
633+
auto IsSuitableAlloca = [&Context, Req,
634+
AllowConst](AllocaCommandBase *AllocaCmd) {
635635
bool Res = sameCtx(AllocaCmd->getQueue()->getContextImplPtr(), Context);
636636
if (IsSuitableSubReq(Req)) {
637637
const Requirement *TmpReq = AllocaCmd->getRequirement();
638638
Res &= AllocaCmd->getType() == Command::CommandType::ALLOCA_SUB_BUF;
639639
Res &= TmpReq->MOffsetInBytes == Req->MOffsetInBytes;
640640
Res &= TmpReq->MSYCLMemObj->getSize() == Req->MSYCLMemObj->getSize();
641+
Res &= AllowConst || !AllocaCmd->MIsConst;
641642
}
642643
return Res;
643644
};
@@ -668,8 +669,8 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
668669
MemObjRecord *Record, const Requirement *Req, QueueImplPtr Queue,
669670
std::vector<Command *> &ToEnqueue) {
670671

671-
AllocaCommandBase *AllocaCmd =
672-
findAllocaForReq(Record, Req, Queue->getContextImplPtr());
672+
AllocaCommandBase *AllocaCmd = findAllocaForReq(
673+
Record, Req, Queue->getContextImplPtr(), /*AllowConst=*/false);
673674

674675
if (!AllocaCmd) {
675676
std::vector<Command *> ToCleanUp;
@@ -722,7 +723,8 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
722723
Scheduler::getInstance().getDefaultHostQueue();
723724
AllocaCommand *HostAllocaCmd = new AllocaCommand(
724725
DefaultHostQueue, FullReq, true /* InitFromUserData */,
725-
nullptr /* LinkedAllocaCmd */);
726+
nullptr /* LinkedAllocaCmd */,
727+
MemObj->isHostPointerReadOnly() /* IsConst */);
726728
Record->MAllocaCommands.push_back(HostAllocaCmd);
727729
Record->MWriteLeaves.push_back(HostAllocaCmd, ToEnqueue);
728730
++(HostAllocaCmd->MLeafCounter);
@@ -754,8 +756,8 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
754756
Queue->is_host() ? checkHostUnifiedMemory(Record->MCurContext)
755757
: HostUnifiedMemory;
756758
if (PinnedHostMemory || HostUnifiedMemoryOnNonHostDevice) {
757-
AllocaCommandBase *LinkedAllocaCmdCand =
758-
findAllocaForReq(Record, Req, Record->MCurContext);
759+
AllocaCommandBase *LinkedAllocaCmdCand = findAllocaForReq(
760+
Record, Req, Record->MCurContext, /*AllowConst=*/false);
759761

760762
// Cannot setup link if candidate is linked already
761763
if (LinkedAllocaCmdCand &&

sycl/source/detail/scheduler/scheduler.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -618,7 +618,8 @@ class Scheduler {
618618
/// Searches for suitable alloca in memory record.
619619
AllocaCommandBase *findAllocaForReq(MemObjRecord *Record,
620620
const Requirement *Req,
621-
const ContextImplPtr &Context);
621+
const ContextImplPtr &Context,
622+
bool AllowConst = true);
622623

623624
friend class Command;
624625

0 commit comments

Comments
 (0)