@@ -3322,21 +3322,9 @@ pi_result piEventsWait(pi_uint32 NumEvents, const pi_event *EventList) {
3322
3322
ze_event_handle_t ZeEvent = EventList[I]->ZeEvent ;
3323
3323
zePrint (" ZeEvent = %lx\n " , pi_cast<std::uintptr_t >(ZeEvent));
3324
3324
3325
- // If event comes from a Map/UnMap operation in integrated device, then do
3326
- // sync, memcpy, and signaling on the host
3325
+ // If event comes from a Map/UnMap operation on integrated device
3326
+ // then nothing needs to be done here except signal the event.
3327
3327
if (EventList[I]->HostSyncforMap ) {
3328
- #if 0
3329
- for (auto ZeWaitEvent : EventList[I]->waitEvents) {
3330
- zePrint("ZeWaitEvent = %lx\n", pi_cast<std::uintptr_t>(ZeWaitEvent));
3331
- if (ZeWaitEvent)
3332
- ZE_CALL(zeEventHostSynchronize(ZeWaitEvent, UINT32_MAX));
3333
- }
3334
- if (EventList[I]->CopyPending) {
3335
- memcpy(EventList[I]->DstBuffer, EventList[I]->SrcBuffer,
3336
- EventList[I]->RetMapSize);
3337
- EventList[I]->CopyPending = false;
3338
- }
3339
- #endif
3340
3328
ZE_CALL (zeEventHostSignal (ZeEvent));
3341
3329
} else {
3342
3330
ZE_CALL (zeEventHostSynchronize (ZeEvent, UINT32_MAX));
@@ -4015,9 +4003,6 @@ piEnqueueMemBufferMap(pi_queue Queue, pi_mem Buffer, pi_bool BlockingMap,
4015
4003
assert (Buffer);
4016
4004
assert (Queue);
4017
4005
4018
- // Lock automatically releases when this goes out of scope.
4019
- std::lock_guard<std::mutex> lock (Queue->PiQueueMutex );
4020
-
4021
4006
// Query the buffer allocation to determine if host allocation
4022
4007
ze_memory_allocation_properties_t ZeMemoryAllocationProperties = {};
4023
4008
ze_device_handle_t ZeDeviceHandle;
@@ -4058,51 +4043,23 @@ piEnqueueMemBufferMap(pi_queue Queue, pi_mem Buffer, pi_bool BlockingMap,
4058
4043
// Can we get SYCL RT to predict/allocate in shared memory
4059
4044
// from the beginning?
4060
4045
if (BufferUsesHostMem) {
4061
- (*Event)->HostSyncforMap = true ;
4062
- #if 0
4063
- for (uint32_t i = 0; i < NumEventsInWaitList; i++) {
4064
- zePrint("Map added ZeWaitEvent = %lx\n",
4065
- pi_cast<std::uintptr_t>(EventWaitList[i]->ZeEvent));
4066
- (*Event)->waitEvents.push_back(EventWaitList[i]->ZeEvent);
4067
- }
4068
- #else
4069
- for (uint32_t i = 0 ; i < NumEventsInWaitList; i++) {
4070
- auto Queue = EventWaitList[i]->Queue ;
4071
- zePrint (" Got Q\n " );
4072
- if (Queue->RefCount > 0 ) {
4073
- zePrint (" Executing commandlist\n " );
4074
- if (auto Res = Queue->executeOpenCommandList ())
4075
- return Res;
4076
- }
4077
- }
4078
- for (uint32_t i = 0 ; i < NumEventsInWaitList; i++) {
4079
- zePrint (" Going to wait on ZeWaitEvent = %lx\n " ,
4080
- pi_cast<std::uintptr_t >(EventWaitList[i]->ZeEvent ));
4081
- auto ZeWaitEvent = EventWaitList[i]->ZeEvent ;
4082
- if (ZeWaitEvent)
4083
- ZE_CALL (zeEventHostSynchronize (ZeWaitEvent, UINT32_MAX));
4084
- }
4085
- #endif
4046
+ // Wait on incoming events before doing the copy
4047
+ piEventsWait (NumEventsInWaitList, EventWaitList);
4086
4048
if (Buffer->MapHostPtr ) {
4087
4049
*RetMap = Buffer->MapHostPtr + Offset;
4088
- #if 0
4089
- (*Event)->SrcBuffer = pi_cast<char*>(Buffer->getZeHandle()) + Offset;
4090
- (*Event)->DstBuffer = *RetMap;
4091
- (*Event)->RetMapSize = Size;
4092
- (*Event)->CopyPending = true;
4093
- #else
4094
- zePrint (" Doing memcpy %p %p %zu\n " , *RetMap,
4095
- pi_cast<char *>(Buffer->getZeHandle ()) + Offset, Size);
4096
4050
memcpy (*RetMap, pi_cast<char *>(Buffer->getZeHandle ()) + Offset, Size);
4097
- zePrint (" DONE\n " );
4098
- #endif
4099
4051
} else {
4100
4052
*RetMap = pi_cast<char *>(Buffer->getZeHandle ()) + Offset;
4101
4053
}
4054
+ // Mark this event as handled
4055
+ (*Event)->HostSyncforMap = true ;
4102
4056
4103
4057
return Buffer->addMapping (*RetMap, Offset, Size);
4104
4058
}
4105
4059
4060
+ // Lock automatically releases when this goes out of scope.
4061
+ std::lock_guard<std::mutex> lock (Queue->PiQueueMutex );
4062
+
4106
4063
// For discrete devices we need a command list
4107
4064
if (auto Res = Queue->Device ->getAvailableCommandList (Queue, &ZeCommandList,
4108
4065
&ZeFence))
@@ -4141,9 +4098,6 @@ pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem MemObj, void *MappedPtr,
4141
4098
const pi_event *EventWaitList, pi_event *Event) {
4142
4099
assert (Queue);
4143
4100
4144
- // Lock automatically releases when this goes out of scope.
4145
- std::lock_guard<std::mutex> lock (Queue->PiQueueMutex );
4146
-
4147
4101
// Query the buffer allocation to determine if host allocation
4148
4102
ze_memory_allocation_properties_t ZeMemoryAllocationProperties = {};
4149
4103
ze_device_handle_t ZeDeviceHandle;
@@ -4194,57 +4148,47 @@ pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem MemObj, void *MappedPtr,
4194
4148
4195
4149
if (BufferUsesHostMem) {
4196
4150
(*Event)->HostSyncforMap = true ;
4197
- #if 0
4198
- for (uint32_t i = 0; i < NumEventsInWaitList; i++) {
4199
- zePrint("UnMap Added ZeWaitEvent = %lx\n",
4200
- pi_cast<std::uintptr_t>(EventWaitList[i]->ZeEvent));
4201
- (*Event)->waitEvents.push_back(EventWaitList[i]->ZeEvent);
4202
- }
4203
- (*Event)->SrcBuffer = MappedPtr;
4204
- (*Event)->DstBuffer =
4205
- pi_cast<char*>(MemObj->getZeHandle()) + MapInfo.Offset;
4206
- (*Event)->RetMapSize = MapInfo.Size;
4207
- (*Event)->CopyPending = true;
4208
- #else
4209
- for (uint32_t i = 0 ; i < NumEventsInWaitList; i++) {
4210
- auto ZeWaitEvent = EventWaitList[i]->ZeEvent ;
4211
- if (ZeWaitEvent)
4212
- ZE_CALL (zeEventHostSynchronize (ZeWaitEvent, UINT32_MAX));
4213
- }
4151
+ // Wait on incoming events before doing the copy
4152
+ piEventsWait (NumEventsInWaitList, EventWaitList);
4214
4153
memcpy (pi_cast<char *>(MemObj->getZeHandle ()) + MapInfo.Offset , MappedPtr,
4215
4154
MapInfo.Size );
4216
- # endif
4217
- } else {
4155
+ // Mark this event as handled
4156
+ (*Event)-> HostSyncforMap = true ;
4218
4157
4219
- if (auto Res = Queue->Device ->getAvailableCommandList (Queue, &ZeCommandList,
4220
- &ZeFence))
4221
- return Res;
4158
+ return PI_SUCCESS;
4159
+ }
4222
4160
4223
- ze_event_handle_t *ZeEventWaitList =
4224
- _pi_event::createZeEventList (NumEventsInWaitList, EventWaitList );
4161
+ // Lock automatically releases when this goes out of scope.
4162
+ std::lock_guard<std::mutex> lock (Queue-> PiQueueMutex );
4225
4163
4226
- ZE_CALL (zeCommandListAppendWaitOnEvents (ZeCommandList, NumEventsInWaitList,
4227
- ZeEventWaitList));
4164
+ if (auto Res = Queue->Device ->getAvailableCommandList (Queue, &ZeCommandList,
4165
+ &ZeFence))
4166
+ return Res;
4228
4167
4229
- // TODO: Level Zero is missing the memory "mapping" capabilities, so we are
4230
- // left to doing copy (write back to the device).
4231
- // See https://gitlab.devtools.intel.com/one-api/level_zero/issues/293. //
4232
- // INTEL
4233
- //
4234
- // NOTE: Keep this in sync with the implementation of
4235
- // piEnqueueMemBufferMap/piEnqueueMemImageMap.
4168
+ ze_event_handle_t *ZeEventWaitList =
4169
+ _pi_event::createZeEventList (NumEventsInWaitList, EventWaitList);
4236
4170
4237
- ZE_CALL (zeCommandListAppendMemoryCopy (
4238
- ZeCommandList, pi_cast<char *>(MemObj->getZeHandle ()) + MapInfo.Offset ,
4239
- MappedPtr, MapInfo.Size , ZeEvent, 0 , nullptr ));
4171
+ ZE_CALL (zeCommandListAppendWaitOnEvents (ZeCommandList, NumEventsInWaitList,
4172
+ ZeEventWaitList));
4240
4173
4241
- // Execute command list asynchronously, as the event will be used
4242
- // to track down its completion.
4243
- if (auto Res = Queue->executeCommandList (ZeCommandList, ZeFence))
4244
- return Res;
4174
+ // TODO: Level Zero is missing the memory "mapping" capabilities, so we are
4175
+ // left to doing copy (write back to the device).
4176
+ // See https://gitlab.devtools.intel.com/one-api/level_zero/issues/293. //
4177
+ // INTEL
4178
+ //
4179
+ // NOTE: Keep this in sync with the implementation of
4180
+ // piEnqueueMemBufferMap/piEnqueueMemImageMap.
4245
4181
4246
- _pi_event::deleteZeEventList (ZeEventWaitList);
4247
- }
4182
+ ZE_CALL (zeCommandListAppendMemoryCopy (
4183
+ ZeCommandList, pi_cast<char *>(MemObj->getZeHandle ()) + MapInfo.Offset ,
4184
+ MappedPtr, MapInfo.Size , ZeEvent, 0 , nullptr ));
4185
+
4186
+ // Execute command list asynchronously, as the event will be used
4187
+ // to track down its completion.
4188
+ if (auto Res = Queue->executeCommandList (ZeCommandList, ZeFence))
4189
+ return Res;
4190
+
4191
+ _pi_event::deleteZeEventList (ZeEventWaitList);
4248
4192
4249
4193
return PI_SUCCESS;
4250
4194
}
0 commit comments