Skip to content

Commit 5b0952c

Browse files
jbrodmanbader
authored andcommitted
[SYCL][USM] Enable USM on HOST device (#579)
Signed-off-by: James Brodman <[email protected]>
1 parent 3b20615 commit 5b0952c

21 files changed

+181
-99
lines changed

sycl/include/CL/sycl/detail/aligned_allocator.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ template <typename T> class aligned_allocator {
3333
public:
3434
template <typename U> struct rebind { typedef aligned_allocator<U> other; };
3535

36+
aligned_allocator() = default;
37+
~aligned_allocator() = default;
38+
39+
explicit aligned_allocator(size_t Alignment) : MAlignment(Alignment) {}
40+
3641
// Construct an object
3742
void construct(pointer Ptr, const_reference Val) {
3843
new (Ptr) value_type(Val);

sycl/include/CL/sycl/detail/memory_manager.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ class MemoryManager {
121121
void *MappedPtr, std::vector<RT::PiEvent> DepEvents,
122122
bool UseExclusiveQueue, RT::PiEvent &OutEvent);
123123

124-
static void copy_usm(void *SrcMem, QueueImplPtr Queue, size_t Len,
124+
static void copy_usm(const void *SrcMem, QueueImplPtr Queue, size_t Len,
125125
void *DstMem, std::vector<RT::PiEvent> DepEvents,
126126
bool UseExclusiveQueue, RT::PiEvent &OutEvent);
127127

sycl/include/CL/sycl/detail/queue_impl.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -221,8 +221,10 @@ class queue_impl {
221221
return m_PropList.get_property<propertyT>();
222222
}
223223

224-
event memset(void *Ptr, int Value, size_t Count);
225-
event memcpy(void *Dest, const void *Src, size_t Count);
224+
event memset(std::shared_ptr<queue_impl> Impl, void *Ptr, int Value,
225+
size_t Count);
226+
event memcpy(std::shared_ptr<queue_impl> Impl, void *Dest, const void *Src,
227+
size_t Count);
226228
event mem_advise(const void *Ptr, size_t Length, int Advice);
227229

228230
private:

sycl/include/CL/sycl/ordered_queue.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,11 +105,11 @@ class ordered_queue {
105105
}
106106

107107
event memset(void* ptr, int value, size_t count) {
108-
return impl->memset(ptr, value, count);
108+
return impl->memset(impl, ptr, value, count);
109109
}
110110

111111
event memcpy(void* dest, const void* src, size_t count) {
112-
return impl->memcpy(dest, src, count);
112+
return impl->memcpy(impl, dest, src, count);
113113
}
114114

115115
private:

sycl/include/CL/sycl/queue.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,11 +105,11 @@ class queue {
105105
}
106106

107107
event memset(void* Ptr, int Value, size_t Count) {
108-
return impl->memset(Ptr, Value, Count);
108+
return impl->memset(impl, Ptr, Value, Count);
109109
}
110110

111111
event memcpy(void* Dest, const void* Src, size_t Count) {
112-
return impl->memcpy(Dest, Src, Count);
112+
return impl->memcpy(impl, Dest, Src, Count);
113113
}
114114

115115
event mem_advise(const void *Ptr, size_t Length, int Advice) {

sycl/source/detail/memory_manager.cpp

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -463,29 +463,41 @@ void MemoryManager::unmap(SYCLMemObjI *SYCLMemObj, void *Mem,
463463
DepEvents.empty() ? nullptr : &DepEvents[0], &OutEvent));
464464
}
465465

466-
void MemoryManager::copy_usm(void *SrcMem, QueueImplPtr SrcQueue, size_t Len,
466+
void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue, size_t Len,
467467
void *DstMem, std::vector<RT::PiEvent> DepEvents,
468468
bool UseExclusiveQueue, RT::PiEvent &OutEvent) {
469-
RT::PiQueue Queue = UseExclusiveQueue
470-
? SrcQueue->getExclusiveQueueHandleRef()
471-
: SrcQueue->getHandleRef();
472-
473469
sycl::context Context = SrcQueue->get_context();
474-
std::shared_ptr<usm::USMDispatcher> USMDispatch =
475-
getSyclObjImpl(Context)->getUSMDispatch();
476-
PI_CHECK(USMDispatch->enqueueMemcpy(Queue,
477-
/* blocking */ false, DstMem, SrcMem, Len, DepEvents.size(),
478-
&DepEvents[0], &OutEvent));
470+
471+
if (Context.is_host()) {
472+
std::memcpy(DstMem, SrcMem, Len);
473+
} else {
474+
RT::PiQueue Queue = UseExclusiveQueue
475+
? SrcQueue->getExclusiveQueueHandleRef()
476+
: SrcQueue->getHandleRef();
477+
478+
std::shared_ptr<usm::USMDispatcher> USMDispatch =
479+
getSyclObjImpl(Context)->getUSMDispatch();
480+
PI_CHECK(USMDispatch->enqueueMemcpy(Queue,
481+
/* blocking */ false, DstMem, SrcMem,
482+
Len, DepEvents.size(), &DepEvents[0],
483+
&OutEvent));
484+
}
479485
}
480486

481487
void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length,
482488
int Pattern, std::vector<RT::PiEvent> DepEvents,
483489
RT::PiEvent &OutEvent) {
484490
sycl::context Context = Queue->get_context();
485-
std::shared_ptr<usm::USMDispatcher> USMDispatch =
486-
getSyclObjImpl(Context)->getUSMDispatch();
487-
PI_CHECK(USMDispatch->enqueueMemset(Queue->getHandleRef(),
488-
Mem, Pattern, Length, DepEvents.size(), &DepEvents[0], &OutEvent));
491+
492+
if (Context.is_host()) {
493+
std::memset(Mem, Pattern, Length);
494+
} else {
495+
std::shared_ptr<usm::USMDispatcher> USMDispatch =
496+
getSyclObjImpl(Context)->getUSMDispatch();
497+
PI_CHECK(USMDispatch->enqueueMemset(Queue->getHandleRef(), Mem, Pattern,
498+
Length, DepEvents.size(), &DepEvents[0],
499+
&OutEvent));
500+
}
489501
}
490502

491503
} // namespace detail

sycl/source/detail/queue_impl.cpp

Lines changed: 28 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,14 @@
88

99
#include <CL/sycl/context.hpp>
1010
#include <CL/sycl/detail/clusm.hpp>
11+
#include <CL/sycl/detail/memory_manager.hpp>
1112
#include <CL/sycl/detail/pi.hpp>
1213
#include <CL/sycl/detail/queue_impl.hpp>
1314
#include <CL/sycl/detail/usm_dispatch.hpp>
1415
#include <CL/sycl/device.hpp>
1516

17+
#include <cstring>
18+
1619
namespace cl {
1720
namespace sycl {
1821
namespace detail {
@@ -33,43 +36,46 @@ template <> device queue_impl::get_info<info::queue::device>() const {
3336
return get_device();
3437
}
3538

36-
event queue_impl::memset(void *Ptr, int Value, size_t Count) {
39+
event queue_impl::memset(std::shared_ptr<detail::queue_impl> Impl, void *Ptr,
40+
int Value, size_t Count) {
3741
context Context = get_context();
38-
std::shared_ptr<usm::USMDispatcher> USMDispatch =
39-
getSyclObjImpl(Context)->getUSMDispatch();
40-
cl_event Event;
42+
RT::PiEvent Event = nullptr;
43+
MemoryManager::fill_usm(Ptr, Impl, Count, Value, /*DepEvents*/ {}, Event);
4144

42-
PI_CHECK(USMDispatch->enqueueMemset(getHandleRef(), Ptr, Value, Count,
43-
/* sizeof waitlist */ 0, nullptr,
44-
reinterpret_cast<pi_event *>(&Event)));
45+
if (Context.is_host())
46+
return event();
4547

46-
return event(Event, Context);
48+
return event(pi::cast<cl_event>(Event), Context);
4749
}
4850

49-
event queue_impl::memcpy(void *Dest, const void *Src, size_t Count) {
51+
event queue_impl::memcpy(std::shared_ptr<detail::queue_impl> Impl, void *Dest,
52+
const void *Src, size_t Count) {
5053
context Context = get_context();
51-
std::shared_ptr<usm::USMDispatcher> USMDispatch =
52-
getSyclObjImpl(Context)->getUSMDispatch();
53-
cl_event Event;
54+
RT::PiEvent Event = nullptr;
55+
// Not entirely sure when UseExclusiveQueue should be true
56+
MemoryManager::copy_usm(Src, Impl, Count, Dest, /*DepEvents*/ {},
57+
/*ExclusiveQueue*/ false, Event);
5458

55-
PI_CHECK(USMDispatch->enqueueMemcpy(getHandleRef(),
56-
/* blocking */ false, Dest, Src, Count,
57-
/* sizeof waitlist */ 0, nullptr,
58-
reinterpret_cast<pi_event *>(&Event)));
59+
if (Context.is_host())
60+
return event();
5961

60-
return event(Event, Context);
62+
return event(pi::cast<cl_event>(Event), Context);
6163
}
6264

6365
event queue_impl::mem_advise(const void *Ptr, size_t Length, int Advice) {
6466
context Context = get_context();
67+
if (Context.is_host()) {
68+
return event();
69+
}
70+
71+
// non-Host device
6572
std::shared_ptr<usm::USMDispatcher> USMDispatch =
66-
getSyclObjImpl(Context)->getUSMDispatch();
67-
cl_event Event;
73+
getSyclObjImpl(Context)->getUSMDispatch();
74+
RT::PiEvent Event = nullptr;
6875

69-
USMDispatch->memAdvise(getHandleRef(), Ptr, Length, Advice,
70-
reinterpret_cast<pi_event *>(&Event));
76+
USMDispatch->memAdvise(getHandleRef(), Ptr, Length, Advice, &Event);
7177

72-
return event(Event, Context);
78+
return event(pi::cast<cl_event>(Event), Context);
7379
}
7480
} // namespace detail
7581
} // namespace sycl

sycl/source/detail/usm/usm_impl.cpp

Lines changed: 97 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,14 @@
77
// ===--------------------------------------------------------------------=== //
88

99
#include <CL/sycl/context.hpp>
10+
#include <CL/sycl/detail/aligned_allocator.hpp>
11+
#include <CL/sycl/detail/os_util.hpp>
1012
#include <CL/sycl/detail/pi.hpp>
1113
#include <CL/sycl/device.hpp>
1214
#include <CL/sycl/usm.hpp>
1315

16+
#include <cstdlib>
17+
1418
namespace cl {
1519
namespace sycl {
1620

@@ -21,75 +25,112 @@ namespace usm {
2125

2226
void *alignedAlloc(size_t Alignment, size_t Size, const context &Ctxt,
2327
alloc Kind) {
24-
std::shared_ptr<context_impl> CtxImpl = detail::getSyclObjImpl(Ctxt);
25-
std::shared_ptr<USMDispatcher> Dispatch = CtxImpl->getUSMDispatch();
26-
pi_context C = CtxImpl->getHandleRef();
27-
pi_result Error;
2828
void *RetVal = nullptr;
29-
30-
switch (Kind) {
31-
case alloc::host: {
32-
RetVal = Dispatch->hostMemAlloc(C, nullptr, Size, Alignment, &Error);
33-
break;
34-
}
35-
case alloc::device:
36-
case alloc::shared:
37-
case alloc::unknown: {
38-
RetVal = nullptr;
39-
Error = PI_INVALID_VALUE;
40-
break;
41-
}
29+
if (Ctxt.is_host()) {
30+
if (!Alignment) {
31+
// worst case default
32+
Alignment = 128;
33+
}
34+
35+
aligned_allocator<char> Alloc(Alignment);
36+
try {
37+
RetVal = Alloc.allocate(Size);
38+
} catch (const std::bad_alloc &) {
39+
// Conform with Specification behavior
40+
RetVal = nullptr;
41+
}
42+
} else {
43+
std::shared_ptr<context_impl> CtxImpl = detail::getSyclObjImpl(Ctxt);
44+
std::shared_ptr<USMDispatcher> Dispatch = CtxImpl->getUSMDispatch();
45+
pi_context C = CtxImpl->getHandleRef();
46+
pi_result Error;
47+
48+
switch (Kind) {
49+
case alloc::host: {
50+
RetVal = Dispatch->hostMemAlloc(C, nullptr, Size, Alignment, &Error);
51+
break;
52+
}
53+
case alloc::device:
54+
case alloc::shared:
55+
case alloc::unknown: {
56+
RetVal = nullptr;
57+
Error = PI_INVALID_VALUE;
58+
break;
59+
}
60+
}
61+
62+
// Error is for debugging purposes.
63+
// The spec wants a nullptr returned, not an exception.
64+
if (Error != PI_SUCCESS)
65+
return nullptr;
4266
}
43-
44-
// Error is for debugging purposes.
45-
// The spec wants a nullptr returned, not an exception.
46-
if (Error != PI_SUCCESS) return nullptr;
47-
4867
return RetVal;
4968
}
5069

5170
void *alignedAlloc(size_t Alignment, size_t Size, const context &Ctxt,
5271
const device &Dev, alloc Kind) {
53-
std::shared_ptr<context_impl> CtxImpl = detail::getSyclObjImpl(Ctxt);
54-
std::shared_ptr<USMDispatcher> Dispatch = CtxImpl->getUSMDispatch();
55-
pi_context C = CtxImpl->getHandleRef();
56-
pi_result Error;
57-
pi_device Id;
5872
void *RetVal = nullptr;
59-
60-
switch (Kind) {
61-
case alloc::device: {
62-
Id = detail::getSyclObjImpl(Dev)->getHandleRef();
63-
RetVal = Dispatch->deviceMemAlloc(C, Id, nullptr, Size, Alignment, &Error);
64-
break;
73+
if (Ctxt.is_host()) {
74+
if (!Alignment) {
75+
// worst case default
76+
Alignment = 128;
77+
}
78+
79+
aligned_allocator<char> Alloc(Alignment);
80+
try {
81+
RetVal = Alloc.allocate(Size);
82+
} catch (const std::bad_alloc &) {
83+
// Conform with Specification behavior
84+
RetVal = nullptr;
85+
}
86+
} else {
87+
std::shared_ptr<context_impl> CtxImpl = detail::getSyclObjImpl(Ctxt);
88+
std::shared_ptr<USMDispatcher> Dispatch = CtxImpl->getUSMDispatch();
89+
pi_context C = CtxImpl->getHandleRef();
90+
pi_result Error;
91+
pi_device Id;
92+
93+
switch (Kind) {
94+
case alloc::device: {
95+
Id = detail::getSyclObjImpl(Dev)->getHandleRef();
96+
RetVal =
97+
Dispatch->deviceMemAlloc(C, Id, nullptr, Size, Alignment, &Error);
98+
break;
99+
}
100+
case alloc::shared: {
101+
Id = detail::getSyclObjImpl(Dev)->getHandleRef();
102+
RetVal =
103+
Dispatch->sharedMemAlloc(C, Id, nullptr, Size, Alignment, &Error);
104+
break;
105+
}
106+
case alloc::host:
107+
case alloc::unknown: {
108+
RetVal = nullptr;
109+
Error = PI_INVALID_VALUE;
110+
break;
111+
}
112+
}
113+
114+
// Error is for debugging purposes.
115+
// The spec wants a nullptr returned, not an exception.
116+
if (Error != PI_SUCCESS)
117+
return nullptr;
65118
}
66-
case alloc::shared: {
67-
Id = detail::getSyclObjImpl(Dev)->getHandleRef();
68-
RetVal = Dispatch->sharedMemAlloc(C, Id, nullptr, Size, Alignment, &Error);
69-
break;
70-
}
71-
case alloc::host:
72-
case alloc::unknown: {
73-
RetVal = nullptr;
74-
Error = PI_INVALID_VALUE;
75-
break;
76-
}
77-
}
78-
79-
// Error is for debugging purposes.
80-
// The spec wants a nullptr returned, not an exception.
81-
if (Error != PI_SUCCESS) return nullptr;
82-
83119
return RetVal;
84120
}
85121

86122
void free(void *Ptr, const context &Ctxt) {
87-
std::shared_ptr<context_impl> CtxImpl = detail::getSyclObjImpl(Ctxt);
88-
std::shared_ptr<USMDispatcher> Dispatch = CtxImpl->getUSMDispatch();
89-
pi_context C = CtxImpl->getHandleRef();
90-
pi_result Error = Dispatch->memFree(C, Ptr);
91-
92-
PI_CHECK(Error);
123+
if (Ctxt.is_host()) {
124+
// need to use alignedFree here for Windows
125+
detail::OSUtil::alignedFree(Ptr);
126+
} else {
127+
std::shared_ptr<context_impl> CtxImpl = detail::getSyclObjImpl(Ctxt);
128+
std::shared_ptr<USMDispatcher> Dispatch = CtxImpl->getUSMDispatch();
129+
pi_context C = CtxImpl->getHandleRef();
130+
pi_result Error = Dispatch->memFree(C, Ptr);
131+
132+
PI_CHECK(Error);
133+
}
93134
}
94135

95136
} // namespace usm

sycl/test/usm/allocator_vector.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// RUN: %clangxx -fsycl %s -o %t1.out
2+
// RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
23
// RUN: %CPU_RUN_PLACEHOLDER %t1.out
34
// RUN: %GPU_RUN_PLACEHOLDER %t1.out
5+
46
//==---- allocator_vector.cpp - Allocator Container test -------------------==//
57
//
68
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

0 commit comments

Comments
 (0)