Skip to content

Commit 9bf0e20

Browse files
[SYCL][Level-Zero] Fixes to ZE_CALL tracing and introduction of internal PI_CALL tracing. (#3163)
* [SYCL] Fixes to ZE_CALL tracing and introduction of PI_CALL tracing for debugging purposes. It also improves error handling by not silently ignoring errors occuring in internally made PI calls. Signed-off-by: Sergey V Maslov <[email protected]>
1 parent 83f7815 commit 9bf0e20

File tree

2 files changed

+84
-75
lines changed

2 files changed

+84
-75
lines changed

sycl/plugins/level_zero/pi_level_zero.cpp

100755100644
Lines changed: 82 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,53 @@ std::mutex ZeCall::GlobalLock;
7373
// Controls Level Zero calls tracing in zePrint.
7474
static bool ZeDebug = false;
7575

76+
// Map Level Zero runtime error code to PI error code
77+
static pi_result mapError(ze_result_t ZeResult) {
78+
// TODO: these mapping need to be clarified and synced with the PI API return
79+
// values, which is TBD.
80+
static std::unordered_map<ze_result_t, pi_result> ErrorMapping = {
81+
{ZE_RESULT_SUCCESS, PI_SUCCESS},
82+
{ZE_RESULT_ERROR_DEVICE_LOST, PI_DEVICE_NOT_FOUND},
83+
{ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, PI_INVALID_OPERATION},
84+
{ZE_RESULT_ERROR_NOT_AVAILABLE, PI_INVALID_OPERATION},
85+
{ZE_RESULT_ERROR_UNINITIALIZED, PI_INVALID_PLATFORM},
86+
{ZE_RESULT_ERROR_INVALID_ARGUMENT, PI_INVALID_VALUE},
87+
{ZE_RESULT_ERROR_INVALID_NULL_POINTER, PI_INVALID_VALUE},
88+
{ZE_RESULT_ERROR_INVALID_SIZE, PI_INVALID_VALUE},
89+
{ZE_RESULT_ERROR_UNSUPPORTED_SIZE, PI_INVALID_VALUE},
90+
{ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT, PI_INVALID_VALUE},
91+
{ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT, PI_INVALID_EVENT},
92+
{ZE_RESULT_ERROR_INVALID_ENUMERATION, PI_INVALID_VALUE},
93+
{ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION, PI_INVALID_VALUE},
94+
{ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT, PI_INVALID_VALUE},
95+
{ZE_RESULT_ERROR_INVALID_NATIVE_BINARY, PI_INVALID_BINARY},
96+
{ZE_RESULT_ERROR_INVALID_KERNEL_NAME, PI_INVALID_KERNEL_NAME},
97+
{ZE_RESULT_ERROR_INVALID_FUNCTION_NAME, PI_BUILD_PROGRAM_FAILURE},
98+
{ZE_RESULT_ERROR_OVERLAPPING_REGIONS, PI_INVALID_OPERATION},
99+
{ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION,
100+
PI_INVALID_WORK_GROUP_SIZE},
101+
{ZE_RESULT_ERROR_MODULE_BUILD_FAILURE, PI_BUILD_PROGRAM_FAILURE}};
102+
103+
auto It = ErrorMapping.find(ZeResult);
104+
if (It == ErrorMapping.end()) {
105+
return PI_ERROR_UNKNOWN;
106+
}
107+
return It->second;
108+
}
109+
110+
// Trace a call to Level-Zero RT
111+
#define ZE_CALL(Call) \
112+
if (auto Result = ZeCall().doCall(Call, #Call, true)) \
113+
return mapError(Result);
114+
#define ZE_CALL_NOCHECK(Call) ZeCall().doCall(Call, #Call, false)
115+
116+
// Trace an internal PI call; returns in case of an error.
117+
#define PI_CALL(Call) \
118+
fprintf(stderr, "PI ---> %s\n", #Call); \
119+
pi_result Result = (Call); \
120+
if (Result != PI_SUCCESS) \
121+
return Result;
122+
76123
// Controls Level Zero validation layer and parameter validation.
77124
static bool ZeValidationLayer = false;
78125

@@ -212,7 +259,7 @@ pi_result _pi_mem::removeMapping(void *MappedTo, Mapping &MapInfo) {
212259
return PI_SUCCESS;
213260
}
214261

215-
ze_result_t
262+
pi_result
216263
_pi_context::getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &ZePool,
217264
size_t &Index) {
218265
// Maximum number of events that can be present in an event ZePool is captured
@@ -227,7 +274,7 @@ _pi_context::getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &ZePool,
227274
if (MaxNumEventsPerPool == 0) {
228275
zePrint("Zero size can't be specified in the "
229276
"ZE_MAX_NUMBER_OF_EVENTS_PER_EVENT_POOL\n");
230-
return ZE_RESULT_ERROR_INVALID_SIZE;
277+
return PI_INVALID_VALUE;
231278
}
232279

233280
Index = 0;
@@ -260,10 +307,8 @@ _pi_context::getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &ZePool,
260307
std::for_each(Devices.begin(), Devices.end(),
261308
[&](pi_device &D) { ZeDevices.push_back(D->ZeDevice); });
262309

263-
if (ze_result_t ZeRes =
264-
zeEventPoolCreate(ZeContext, &ZeEventPoolDesc, ZeDevices.size(),
265-
&ZeDevices[0], &ZeEventPool))
266-
return ZeRes;
310+
ZE_CALL(zeEventPoolCreate(ZeContext, &ZeEventPoolDesc, ZeDevices.size(),
311+
&ZeDevices[0], &ZeEventPool));
267312
NumEventsAvailableInEventPool[ZeEventPool] = MaxNumEventsPerPool - 1;
268313
NumEventsLiveInEventPool[ZeEventPool] = MaxNumEventsPerPool;
269314
} else {
@@ -273,57 +318,24 @@ _pi_context::getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &ZePool,
273318
--NumEventsAvailableInEventPool[ZeEventPool];
274319
}
275320
ZePool = ZeEventPool;
276-
return ZE_RESULT_SUCCESS;
321+
return PI_SUCCESS;
277322
}
278323

279-
ze_result_t
324+
pi_result
280325
_pi_context::decrementAliveEventsInPool(ze_event_pool_handle_t ZePool) {
281326
std::lock_guard<std::mutex> Lock(NumEventsLiveInEventPoolMutex);
282327
--NumEventsLiveInEventPool[ZePool];
283328
if (NumEventsLiveInEventPool[ZePool] == 0) {
284-
return zeEventPoolDestroy(ZePool);
329+
ZE_CALL(zeEventPoolDestroy(ZePool));
285330
}
286-
return ZE_RESULT_SUCCESS;
331+
return PI_SUCCESS;
287332
}
288333

289334
// Some opencl extensions we know are supported by all Level Zero devices.
290335
constexpr char ZE_SUPPORTED_EXTENSIONS[] =
291336
"cl_khr_il_program cl_khr_subgroups cl_intel_subgroups "
292337
"cl_intel_subgroups_short cl_intel_required_subgroup_size ";
293338

294-
// Map Level Zero runtime error code to PI error code
295-
static pi_result mapError(ze_result_t ZeResult) {
296-
// TODO: these mapping need to be clarified and synced with the PI API return
297-
// values, which is TBD.
298-
static std::unordered_map<ze_result_t, pi_result> ErrorMapping = {
299-
{ZE_RESULT_SUCCESS, PI_SUCCESS},
300-
{ZE_RESULT_ERROR_DEVICE_LOST, PI_DEVICE_NOT_FOUND},
301-
{ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, PI_INVALID_OPERATION},
302-
{ZE_RESULT_ERROR_NOT_AVAILABLE, PI_INVALID_OPERATION},
303-
{ZE_RESULT_ERROR_UNINITIALIZED, PI_INVALID_PLATFORM},
304-
{ZE_RESULT_ERROR_INVALID_ARGUMENT, PI_INVALID_VALUE},
305-
{ZE_RESULT_ERROR_INVALID_NULL_POINTER, PI_INVALID_VALUE},
306-
{ZE_RESULT_ERROR_INVALID_SIZE, PI_INVALID_VALUE},
307-
{ZE_RESULT_ERROR_UNSUPPORTED_SIZE, PI_INVALID_VALUE},
308-
{ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT, PI_INVALID_VALUE},
309-
{ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT, PI_INVALID_EVENT},
310-
{ZE_RESULT_ERROR_INVALID_ENUMERATION, PI_INVALID_VALUE},
311-
{ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION, PI_INVALID_VALUE},
312-
{ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT, PI_INVALID_VALUE},
313-
{ZE_RESULT_ERROR_INVALID_NATIVE_BINARY, PI_INVALID_BINARY},
314-
{ZE_RESULT_ERROR_INVALID_KERNEL_NAME, PI_INVALID_KERNEL_NAME},
315-
{ZE_RESULT_ERROR_INVALID_FUNCTION_NAME, PI_BUILD_PROGRAM_FAILURE},
316-
{ZE_RESULT_ERROR_OVERLAPPING_REGIONS, PI_INVALID_OPERATION},
317-
{ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION,
318-
PI_INVALID_WORK_GROUP_SIZE},
319-
{ZE_RESULT_ERROR_MODULE_BUILD_FAILURE, PI_BUILD_PROGRAM_FAILURE}};
320-
auto It = ErrorMapping.find(ZeResult);
321-
if (It == ErrorMapping.end()) {
322-
return PI_ERROR_UNKNOWN;
323-
}
324-
return It->second;
325-
}
326-
327339
// Forward declarations
328340
static pi_result
329341
enqueueMemCopyHelper(pi_command_type CommandType, pi_queue Queue, void *Dst,
@@ -402,10 +414,6 @@ ze_result_t ZeCall::doCall(ze_result_t ZeResult, const char *CallStr,
402414
#define PI_ASSERT(condition, error) \
403415
if (!(condition)) \
404416
return error;
405-
#define ZE_CALL(Call) \
406-
if (auto Result = ZeCall().doCall(Call, #Call, true)) \
407-
return mapError(Result);
408-
#define ZE_CALL_NOCHECK(Call) ZeCall().doCall(Call, #Call, false)
409417

410418
// Destroy all the command lists associated with this device.
411419
// This is required when destructing the _pi_device object.
@@ -417,7 +425,7 @@ _pi_device::~_pi_device() {
417425
std::lock_guard<std::mutex> Lock(ZeCommandListCacheMutex);
418426
for (ze_command_list_handle_t &ZeCommandList : ZeCommandListCache) {
419427
if (ZeCommandList)
420-
zeCommandListDestroy(ZeCommandList);
428+
ZE_CALL_NOCHECK(zeCommandListDestroy(ZeCommandList));
421429
}
422430
}
423431

@@ -510,7 +518,7 @@ pi_result _pi_context::finalize() {
510518
std::lock_guard<std::mutex> NumEventsLiveInEventPoolGuard(
511519
NumEventsLiveInEventPoolMutex);
512520
if (ZeEventPool && NumEventsLiveInEventPool[ZeEventPool])
513-
zeEventPoolDestroy(ZeEventPool);
521+
ZE_CALL(zeEventPoolDestroy(ZeEventPool));
514522

515523
// Destroy the command list used for initializations
516524
ZE_CALL(zeCommandListDestroy(ZeCommandListInit));
@@ -811,7 +819,7 @@ pi_result _pi_ze_event_list_t::createAndRetainPiZeEventList(
811819
}
812820

813821
for (pi_uint32 I = 0; I < this->Length; I++) {
814-
piEventRetain(this->PiEventList[I]);
822+
PI_CALL(piEventRetain(this->PiEventList[I]));
815823
}
816824
}
817825

@@ -1137,10 +1145,7 @@ pi_result piextPlatformCreateWithNativeHandle(pi_native_handle NativeHandle,
11371145

11381146
if (NumPlatforms) {
11391147
std::vector<pi_platform> Platforms(NumPlatforms);
1140-
Res = piPlatformsGet(NumPlatforms, Platforms.data(), nullptr);
1141-
if (Res != PI_SUCCESS) {
1142-
return Res;
1143-
}
1148+
PI_CALL(piPlatformsGet(NumPlatforms, Platforms.data(), nullptr));
11441149

11451150
// The SYCL spec requires that the set of platforms must remain fixed for
11461151
// the duration of the application's execution. We assume that we found all
@@ -2829,7 +2834,7 @@ pi_result piProgramLink(pi_context Context, pi_uint32 NumDevices,
28292834
// This module imports symbols, but it isn't currently linked with
28302835
// any other module. Grab the flag to indicate that it is now
28312836
// linked.
2832-
piProgramRetain(Input);
2837+
PI_CALL(piProgramRetain(Input));
28332838
Input->HasImportsAndIsLinked = true;
28342839
} else {
28352840
// This module imports symbols and is also linked with another module
@@ -2848,7 +2853,7 @@ pi_result piProgramLink(pi_context Context, pi_uint32 NumDevices,
28482853
Input->HasImportsAndIsLinked = true;
28492854
}
28502855
} else {
2851-
piProgramRetain(Input);
2856+
PI_CALL(piProgramRetain(Input));
28522857
}
28532858
Inputs.emplace_back(Input);
28542859
ZeHandles.push_back(Input->ZeModule);
@@ -3266,7 +3271,7 @@ pi_result piKernelCreate(pi_program Program, const char *KernelName,
32663271
}
32673272

32683273
// Update the refcount of the program to show its use by this kernel.
3269-
piProgramRetain(Program);
3274+
PI_CALL(piProgramRetain(Program));
32703275

32713276
return PI_SUCCESS;
32723277
}
@@ -3474,7 +3479,7 @@ pi_result piKernelRetain(pi_kernel Kernel) {
34743479

34753480
++(Kernel->RefCount);
34763481
// When retaining a kernel, you are also retaining the program it is part of.
3477-
piProgramRetain(Kernel->Program);
3482+
PI_CALL(piProgramRetain(Kernel->Program));
34783483
return PI_SUCCESS;
34793484
}
34803485

@@ -3485,12 +3490,12 @@ pi_result piKernelRelease(pi_kernel Kernel) {
34853490
auto KernelProgram = Kernel->Program;
34863491

34873492
if (--(Kernel->RefCount) == 0) {
3488-
zeKernelDestroy(Kernel->ZeKernel);
3493+
ZE_CALL(zeKernelDestroy(Kernel->ZeKernel));
34893494
delete Kernel;
34903495
}
34913496

34923497
// do a release on the program this kernel was part of
3493-
piProgramRelease(KernelProgram);
3498+
PI_CALL(piProgramRelease(KernelProgram));
34943499

34953500
return PI_SUCCESS;
34963501
}
@@ -3610,7 +3615,7 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
36103615
// code in cleanupAfterEvent will do a piReleaseKernel to update
36113616
// the reference count on the kernel, using the kernel saved
36123617
// in CommandData.
3613-
piKernelRetain(Kernel);
3618+
PI_CALL(piKernelRetain(Kernel));
36143619

36153620
// Add the command to the command list
36163621
ZE_CALL(zeCommandListAppendLaunchKernel(
@@ -3636,7 +3641,9 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
36363641
pi_result piEventCreate(pi_context Context, pi_event *RetEvent) {
36373642
size_t Index = 0;
36383643
ze_event_pool_handle_t ZeEventPool = {};
3639-
ZE_CALL(Context->getFreeSlotInExistingOrNewPool(ZeEventPool, Index));
3644+
if (auto Res = Context->getFreeSlotInExistingOrNewPool(ZeEventPool, Index))
3645+
return Res;
3646+
36403647
ze_event_handle_t ZeEvent;
36413648
ze_event_desc_t ZeEventDesc = {};
36423649
// We have to set the SIGNAL & WAIT flags as HOST scope because the
@@ -3714,15 +3721,15 @@ pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName,
37143721

37153722
switch (ParamName) {
37163723
case PI_PROFILING_INFO_COMMAND_START: {
3717-
zeEventQueryKernelTimestamp(Event->ZeEvent, &tsResult);
3724+
ZE_CALL(zeEventQueryKernelTimestamp(Event->ZeEvent, &tsResult));
37183725

37193726
uint64_t ContextStartTime = tsResult.context.kernelStart;
37203727
ContextStartTime *= ZeTimerResolution;
37213728

37223729
return ReturnValue(uint64_t{ContextStartTime});
37233730
}
37243731
case PI_PROFILING_INFO_COMMAND_END: {
3725-
zeEventQueryKernelTimestamp(Event->ZeEvent, &tsResult);
3732+
ZE_CALL(zeEventQueryKernelTimestamp(Event->ZeEvent, &tsResult));
37263733

37273734
uint64_t ContextStartTime = tsResult.context.kernelStart;
37283735
uint64_t ContextEndTime = tsResult.context.kernelEnd;
@@ -3757,7 +3764,7 @@ pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName,
37573764
// Perform any necessary cleanup after an event has been signalled.
37583765
// This currently recycles the associate command list, and also makes
37593766
// sure to release any kernel that may have been used by the event.
3760-
static void cleanupAfterEvent(pi_event Event) {
3767+
static pi_result cleanupAfterEvent(pi_event Event) {
37613768
// The implementation of this is slightly tricky. The same event
37623769
// can be referred to by multiple threads, so it is possible to
37633770
// have a race condition between the read of fields of the event,
@@ -3794,7 +3801,7 @@ static void cleanupAfterEvent(pi_event Event) {
37943801
// Release the kernel associated with this event if there is one.
37953802
if (Event->CommandType == PI_COMMAND_TYPE_NDRANGE_KERNEL &&
37963803
Event->CommandData) {
3797-
piKernelRelease(pi_cast<pi_kernel>(Event->CommandData));
3804+
PI_CALL(piKernelRelease(pi_cast<pi_kernel>(Event->CommandData)));
37983805
Event->CommandData = nullptr;
37993806
}
38003807
}
@@ -3818,8 +3825,9 @@ static void cleanupAfterEvent(pi_event Event) {
38183825

38193826
DepEvent->WaitList.collectEventsForReleaseAndDestroyPiZeEventList(
38203827
EventsToBeReleased);
3821-
piEventRelease(DepEvent);
3828+
PI_CALL(piEventRelease(DepEvent));
38223829
}
3830+
return PI_SUCCESS;
38233831
}
38243832

38253833
pi_result piEventsWait(pi_uint32 NumEvents, const pi_event *EventList) {
@@ -3893,13 +3901,14 @@ pi_result piEventRelease(pi_event Event) {
38933901
ZE_CALL(zeEventDestroy(Event->ZeEvent));
38943902

38953903
auto Context = Event->Context;
3896-
ZE_CALL(Context->decrementAliveEventsInPool(Event->ZeEventPool));
3904+
if (auto Res = Context->decrementAliveEventsInPool(Event->ZeEventPool))
3905+
return Res;
38973906

38983907
// We intentionally incremented the reference counter when an event is
38993908
// created so that we can avoid pi_queue is released before the associated
39003909
// pi_event is released. Here we have to decrement it so pi_queue
39013910
// can be released successfully.
3902-
piQueueRelease(Event->Queue);
3911+
PI_CALL(piQueueRelease(Event->Queue));
39033912
delete Event;
39043913
}
39053914
return PI_SUCCESS;
@@ -4510,7 +4519,7 @@ pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem Buffer,
45104519
// allocated in host memory.
45114520
if (Buffer->OnHost) {
45124521
// Wait on incoming events before doing the copy
4513-
piEventsWait(NumEventsInWaitList, EventWaitList);
4522+
PI_CALL(piEventsWait(NumEventsInWaitList, EventWaitList));
45144523
if (Buffer->MapHostPtr) {
45154524
*RetMap = Buffer->MapHostPtr + Offset;
45164525
if (!(MapFlags & PI_MAP_WRITE_INVALIDATE_REGION))
@@ -4615,7 +4624,7 @@ pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem MemObj, void *MappedPtr,
46154624
// in host memory.
46164625
if (MemObj->OnHost) {
46174626
// Wait on incoming events before doing the copy
4618-
piEventsWait(NumEventsInWaitList, EventWaitList);
4627+
PI_CALL(piEventsWait(NumEventsInWaitList, EventWaitList));
46194628
if (MemObj->MapHostPtr)
46204629
memcpy(pi_cast<char *>(MemObj->getZeHandle()) + MapInfo.Offset, MappedPtr,
46214630
MapInfo.Size);
@@ -5268,7 +5277,8 @@ pi_result piextUSMFree(pi_context Context, void *Ptr) {
52685277
pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex,
52695278
size_t ArgSize, const void *ArgValue) {
52705279

5271-
return piKernelSetArg(Kernel, ArgIndex, ArgSize, ArgValue);
5280+
PI_CALL(piKernelSetArg(Kernel, ArgIndex, ArgSize, ArgValue));
5281+
return PI_SUCCESS;
52725282
}
52735283

52745284
/// USM Memset API

sycl/plugins/level_zero/pi_level_zero.hpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -239,12 +239,11 @@ struct _pi_context : _pi_object {
239239

240240
// Get index of the free slot in the available pool. If there is no avialble
241241
// pool then create new one.
242-
ze_result_t getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &,
243-
size_t &);
242+
pi_result getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &, size_t &);
244243

245244
// If event is destroyed then decrement number of events living in the pool
246245
// and destroy the pool if there are no alive events.
247-
ze_result_t decrementAliveEventsInPool(ze_event_pool_handle_t pool);
246+
pi_result decrementAliveEventsInPool(ze_event_pool_handle_t pool);
248247

249248
// Store USM allocator context(internal allocator structures)
250249
// for USM shared/host and device allocations. There is 1 allocator context

0 commit comments

Comments
 (0)