@@ -2092,7 +2092,20 @@ pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size,
2092
2092
Context->Devices [0 ]->ZeDeviceProperties .flags &
2093
2093
ZE_DEVICE_PROPERTY_FLAG_INTEGRATED;
2094
2094
2095
- if (DeviceIsIntegrated) {
2095
+ // Having PI_MEM_FLAGS_HOST_PTR_ALLOC for buffer requires allocation of
2096
+ // pinned host memory which then becomes automatically accessible from
2097
+ // discrete devices through PCI. This property ensures that the memory
2098
+ // map/unmap operations are free of cost and the buffer is optimized for
2099
+ // frequent accesses from the host giving improved performance.
2100
+ // see:
2101
+ // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/UsePinnedMemoryProperty/UsePinnedMemoryPropery.adoc
2102
+ bool AllocHostPtr = Flags & PI_MEM_FLAGS_HOST_PTR_ALLOC;
2103
+
2104
+ if (AllocHostPtr) {
2105
+ PI_ASSERT (HostPtr == nullptr , PI_INVALID_VALUE);
2106
+ }
2107
+
2108
+ if (AllocHostPtr || DeviceIsIntegrated) {
2096
2109
ze_host_mem_alloc_desc_t ZeDesc = {};
2097
2110
ZeDesc.flags = 0 ;
2098
2111
@@ -2106,6 +2119,7 @@ pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size,
2106
2119
ZE_CALL (
2107
2120
zeMemAllocDevice (Context->ZeContext , &ZeDesc, Size, 1 , ZeDevice, &Ptr));
2108
2121
}
2122
+
2109
2123
if (HostPtr) {
2110
2124
if ((Flags & PI_MEM_FLAGS_HOST_PTR_USE) != 0 ||
2111
2125
(Flags & PI_MEM_FLAGS_HOST_PTR_COPY) != 0 ) {
@@ -2133,7 +2147,7 @@ pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size,
2133
2147
*RetMem = new _pi_buffer (
2134
2148
Context, pi_cast<char *>(Ptr) /* Level Zero Memory Handle */ ,
2135
2149
HostPtrOrNull, nullptr , 0 , 0 ,
2136
- DeviceIsIntegrated /* Flag indicating allocation in host memory */ );
2150
+ AllocHostPtr || DeviceIsIntegrated /* allocation in host memory */ );
2137
2151
} catch (const std::bad_alloc &) {
2138
2152
return PI_OUT_OF_HOST_MEMORY;
2139
2153
} catch (...) {
@@ -4272,8 +4286,8 @@ pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem Buffer,
4272
4286
void **RetMap) {
4273
4287
4274
4288
// TODO: we don't implement read-only or write-only, always read-write.
4275
- // assert((map_flags & CL_MAP_READ ) != 0);
4276
- // assert((map_flags & CL_MAP_WRITE ) != 0);
4289
+ // assert((map_flags & PI_MAP_READ ) != 0);
4290
+ // assert((map_flags & PI_MAP_WRITE ) != 0);
4277
4291
PI_ASSERT (Buffer, PI_INVALID_MEM_OBJECT);
4278
4292
PI_ASSERT (Queue, PI_INVALID_QUEUE);
4279
4293
@@ -4296,17 +4310,18 @@ pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem Buffer,
4296
4310
4297
4311
// TODO: Level Zero is missing the memory "mapping" capabilities, so we are
4298
4312
// left to doing new memory allocation and a copy (read) on discrete devices.
4299
- // On integrated devices we have allocated the buffer in host memory
4300
- // so no actions are needed here except for synchronizing on incoming events
4301
- // and doing a host-to-host copy if a host pointer had been supplied
4302
- // during buffer creation.
4313
+ // For pinned host memory and integrated devices, we have allocated the
4314
+ // buffer in host memory so no actions are needed here except for
4315
+ // synchronizing on incoming events. A host-to-host copy is done if a host
4316
+ // pointer had been supplied during buffer creation on integrated devices .
4303
4317
//
4304
4318
// TODO: for discrete, check if the input buffer is already allocated
4305
4319
// in shared memory and thus is accessible from the host as is.
4306
4320
// Can we get SYCL RT to predict/allocate in shared memory
4307
4321
// from the beginning?
4308
- //
4309
- // On integrated devices the buffer has been allocated in host memory.
4322
+
4323
+ // For pinned host memory and integrated devices the buffer has been
4324
+ // allocated in host memory.
4310
4325
if (Buffer->OnHost ) {
4311
4326
// Wait on incoming events before doing the copy
4312
4327
piEventsWait (NumEventsInWaitList, EventWaitList);
@@ -4406,7 +4421,8 @@ pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem MemObj, void *MappedPtr,
4406
4421
(*Event)->CommandData =
4407
4422
(MemObj->OnHost ? nullptr : (MemObj->MapHostPtr ? nullptr : MappedPtr));
4408
4423
4409
- // On integrated devices the buffer is allocated in host memory.
4424
+ // For pinned host memory and integrated devices the buffer is allocated
4425
+ // in host memory.
4410
4426
if (MemObj->OnHost ) {
4411
4427
// Wait on incoming events before doing the copy
4412
4428
piEventsWait (NumEventsInWaitList, EventWaitList);
0 commit comments