Skip to content

Commit 0b9a749

Browse files
authored
[SYCL][L0] Add support for pinned host memory. (#2633)
This change implements the support for pinned host memory in level_zero plugin. It allocates pinned host memory when PI_MEM_FLAGS_HOST_PTR_ALLOC is set. Pinned host memories are automatically accessible from the device through PCI. This property also ensures that the memory map/unmap operations are free of cost and the buffer is optimized for frequent accesses from the host. A test is added for this change in a separate PR at intel/llvm-test-suite#83 Signed-off-by: rbegam <[email protected]>
1 parent 8cfcdb2 commit 0b9a749

File tree

1 file changed

+27
-11
lines changed

1 file changed

+27
-11
lines changed

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2092,7 +2092,20 @@ pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size,
20922092
Context->Devices[0]->ZeDeviceProperties.flags &
20932093
ZE_DEVICE_PROPERTY_FLAG_INTEGRATED;
20942094

2095-
if (DeviceIsIntegrated) {
2095+
// Having PI_MEM_FLAGS_HOST_PTR_ALLOC for buffer requires allocation of
2096+
// pinned host memory which then becomes automatically accessible from
2097+
// discrete devices through PCI. This property ensures that the memory
2098+
// map/unmap operations are free of cost and the buffer is optimized for
2099+
// frequent accesses from the host giving improved performance.
2100+
// see:
2101+
// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/UsePinnedMemoryProperty/UsePinnedMemoryPropery.adoc
2102+
bool AllocHostPtr = Flags & PI_MEM_FLAGS_HOST_PTR_ALLOC;
2103+
2104+
if (AllocHostPtr) {
2105+
PI_ASSERT(HostPtr == nullptr, PI_INVALID_VALUE);
2106+
}
2107+
2108+
if (AllocHostPtr || DeviceIsIntegrated) {
20962109
ze_host_mem_alloc_desc_t ZeDesc = {};
20972110
ZeDesc.flags = 0;
20982111

@@ -2106,6 +2119,7 @@ pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size,
21062119
ZE_CALL(
21072120
zeMemAllocDevice(Context->ZeContext, &ZeDesc, Size, 1, ZeDevice, &Ptr));
21082121
}
2122+
21092123
if (HostPtr) {
21102124
if ((Flags & PI_MEM_FLAGS_HOST_PTR_USE) != 0 ||
21112125
(Flags & PI_MEM_FLAGS_HOST_PTR_COPY) != 0) {
@@ -2133,7 +2147,7 @@ pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size,
21332147
*RetMem = new _pi_buffer(
21342148
Context, pi_cast<char *>(Ptr) /* Level Zero Memory Handle */,
21352149
HostPtrOrNull, nullptr, 0, 0,
2136-
DeviceIsIntegrated /* Flag indicating allocation in host memory */);
2150+
AllocHostPtr || DeviceIsIntegrated /* allocation in host memory */);
21372151
} catch (const std::bad_alloc &) {
21382152
return PI_OUT_OF_HOST_MEMORY;
21392153
} catch (...) {
@@ -4272,8 +4286,8 @@ pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem Buffer,
42724286
void **RetMap) {
42734287

42744288
// TODO: we don't implement read-only or write-only, always read-write.
4275-
// assert((map_flags & CL_MAP_READ) != 0);
4276-
// assert((map_flags & CL_MAP_WRITE) != 0);
4289+
// assert((map_flags & PI_MAP_READ) != 0);
4290+
// assert((map_flags & PI_MAP_WRITE) != 0);
42774291
PI_ASSERT(Buffer, PI_INVALID_MEM_OBJECT);
42784292
PI_ASSERT(Queue, PI_INVALID_QUEUE);
42794293

@@ -4296,17 +4310,18 @@ pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem Buffer,
42964310

42974311
// TODO: Level Zero is missing the memory "mapping" capabilities, so we are
42984312
// left to doing new memory allocation and a copy (read) on discrete devices.
4299-
// On integrated devices we have allocated the buffer in host memory
4300-
// so no actions are needed here except for synchronizing on incoming events
4301-
// and doing a host-to-host copy if a host pointer had been supplied
4302-
// during buffer creation.
4313+
// For pinned host memory and integrated devices, we have allocated the
4314+
// buffer in host memory so no actions are needed here except for
4315+
// synchronizing on incoming events. A host-to-host copy is done if a host
4316+
// pointer had been supplied during buffer creation on integrated devices.
43034317
//
43044318
// TODO: for discrete, check if the input buffer is already allocated
43054319
// in shared memory and thus is accessible from the host as is.
43064320
// Can we get SYCL RT to predict/allocate in shared memory
43074321
// from the beginning?
4308-
//
4309-
// On integrated devices the buffer has been allocated in host memory.
4322+
4323+
// For pinned host memory and integrated devices the buffer has been
4324+
// allocated in host memory.
43104325
if (Buffer->OnHost) {
43114326
// Wait on incoming events before doing the copy
43124327
piEventsWait(NumEventsInWaitList, EventWaitList);
@@ -4406,7 +4421,8 @@ pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem MemObj, void *MappedPtr,
44064421
(*Event)->CommandData =
44074422
(MemObj->OnHost ? nullptr : (MemObj->MapHostPtr ? nullptr : MappedPtr));
44084423

4409-
// On integrated devices the buffer is allocated in host memory.
4424+
// For pinned host memory and integrated devices the buffer is allocated
4425+
// in host memory.
44104426
if (MemObj->OnHost) {
44114427
// Wait on incoming events before doing the copy
44124428
piEventsWait(NumEventsInWaitList, EventWaitList);

0 commit comments

Comments
 (0)