@@ -2088,21 +2088,25 @@ pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size,
2088
2088
Context->Devices [0 ]->ZeDeviceProperties .flags &
2089
2089
ZE_DEVICE_PROPERTY_FLAG_INTEGRATED;
2090
2090
2091
- // PI_MEM_FLAGS_HOST_PTR_ALLOC flag indicates allocation of pinned
2092
- // host memory which is accessible from device.
2091
+ // Having PI_MEM_FLAGS_HOST_PTR_ALLOC for buffer requires allocation of
2092
+ // pinned host memory which then becomes automatically accessible from
2093
+ // discrete devices through PCI. This property ensures that the memory
2094
+ // map/unmap operations are free of cost and the buffer is optimized for
2095
+ // frequent accesses from the host giving improved performance.
2096
+ // see:
2097
+ // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/UsePinnedMemoryProperty/UsePinnedMemoryPropery.adoc
2093
2098
bool AllocHostPtr = Flags & PI_MEM_FLAGS_HOST_PTR_ALLOC;
2094
2099
2095
2100
if (AllocHostPtr) {
2096
- assert (HostPtr == nullptr &&
2097
- " PI_MEM_FLAGS_HOST_PTR_ALLOC cannot be used with host pointer" );
2101
+ PI_ASSERT (HostPtr == nullptr , PI_INVALID_VALUE);
2098
2102
2099
2103
ze_host_mem_alloc_desc_t ZeDesc = {};
2100
2104
ZeDesc.flags = 0 ;
2101
2105
2102
2106
ZE_CALL (zeMemAllocHost (Context->ZeContext , &ZeDesc, Size, 1 , &Ptr));
2103
- }
2104
2107
2105
- if (DeviceIsIntegrated) {
2108
+ } else if (DeviceIsIntegrated) {
2109
+
2106
2110
ze_host_mem_alloc_desc_t ZeDesc = {};
2107
2111
ZeDesc.flags = 0 ;
2108
2112
@@ -2144,7 +2148,7 @@ pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size,
2144
2148
*RetMem = new _pi_buffer (
2145
2149
Context, pi_cast<char *>(Ptr) /* Level Zero Memory Handle */ ,
2146
2150
HostPtrOrNull, nullptr , 0 , 0 ,
2147
- DeviceIsIntegrated /* Flag indicating allocation in host memory */ );
2151
+ AllocHostPtr || DeviceIsIntegrated /* allocation in host memory */ );
2148
2152
} catch (const std::bad_alloc &) {
2149
2153
return PI_OUT_OF_HOST_MEMORY;
2150
2154
} catch (...) {
@@ -4283,8 +4287,8 @@ pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem Buffer,
4283
4287
void **RetMap) {
4284
4288
4285
4289
// TODO: we don't implement read-only or write-only, always read-write.
4286
- // assert((map_flags & CL_MAP_READ ) != 0);
4287
- // assert((map_flags & CL_MAP_WRITE ) != 0);
4290
+ // assert((map_flags & PI_MAP_READ ) != 0);
4291
+ // assert((map_flags & PI_MAP_WRITE ) != 0);
4288
4292
PI_ASSERT (Buffer, PI_INVALID_MEM_OBJECT);
4289
4293
PI_ASSERT (Queue, PI_INVALID_QUEUE);
4290
4294
@@ -4307,17 +4311,18 @@ pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem Buffer,
4307
4311
4308
4312
// TODO: Level Zero is missing the memory "mapping" capabilities, so we are
4309
4313
// left to doing new memory allocation and a copy (read) on discrete devices.
4310
- // On integrated devices we have allocated the buffer in host memory
4311
- // so no actions are needed here except for synchronizing on incoming events
4312
- // and doing a host-to-host copy if a host pointer had been supplied
4313
- // during buffer creation.
4314
+ // For pinned host memory and integrated devices, we have allocated the
4315
+ // buffer in host memory so no actions are needed here except for
4316
+ // synchronizing on incoming events. A host-to-host copy is done if a host
4317
+ // pointer had been supplied during buffer creation on integrated devices .
4314
4318
//
4315
4319
// TODO: for discrete, check if the input buffer is already allocated
4316
4320
// in shared memory and thus is accessible from the host as is.
4317
4321
// Can we get SYCL RT to predict/allocate in shared memory
4318
4322
// from the beginning?
4319
- //
4320
- // On integrated devices the buffer has been allocated in host memory.
4323
+
4324
+ // For pinned host memory and integrated devices the buffer has been
4325
+ // allocated in host memory.
4321
4326
if (Buffer->OnHost ) {
4322
4327
// Wait on incoming events before doing the copy
4323
4328
piEventsWait (NumEventsInWaitList, EventWaitList);
@@ -4417,7 +4422,8 @@ pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem MemObj, void *MappedPtr,
4417
4422
(*Event)->CommandData =
4418
4423
(MemObj->OnHost ? nullptr : (MemObj->MapHostPtr ? nullptr : MappedPtr));
4419
4424
4420
- // On integrated devices the buffer is allocated in host memory.
4425
+ // For pinned host memory and integrated devices the buffer is allocated
4426
+ // in host memory.
4421
4427
if (MemObj->OnHost ) {
4422
4428
// Wait on incoming events before doing the copy
4423
4429
piEventsWait (NumEventsInWaitList, EventWaitList);
0 commit comments