Skip to content

Commit 997345d

Browse files
authored
Merge pull request #2410 from steffenlarsen/steffen/fix_profiling_tag_eviction_and_caching
[L0] Fix cached and evicted timestamp recordings
2 parents 81452ff + b1f1170 commit 997345d

File tree

3 files changed

+31
-35
lines changed

3 files changed

+31
-35
lines changed

source/adapters/level_zero/event.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -581,8 +581,7 @@ ur_result_t urEventGetProfilingInfo(
581581

582582
// End time needs to be adjusted for resolution and valid bits.
583583
uint64_t ContextEndTime =
584-
(EndTimeRecording.RecordEventEndTimestamp & TimestampMaxValue) *
585-
ZeTimerResolution;
584+
(EndTimeRecording & TimestampMaxValue) * ZeTimerResolution;
586585

587586
// If the result is 0, we have not yet gotten results back and so we just
588587
// return it.
@@ -755,20 +754,20 @@ ur_result_t urEnqueueTimestampRecordingExp(
755754
ze_event_handle_t ZeEvent = (*OutEvent)->ZeEvent;
756755
(*OutEvent)->WaitList = TmpWaitList;
757756

757+
// Reset the end timestamp, in case it has been previously used.
758+
(*OutEvent)->RecordEventEndTimestamp = 0;
759+
758760
uint64_t DeviceStartTimestamp = 0;
759761
UR_CALL(ur::level_zero::urDeviceGetGlobalTimestamps(
760762
Device, &DeviceStartTimestamp, nullptr));
761763
(*OutEvent)->RecordEventStartTimestamp = DeviceStartTimestamp;
762764

763765
// Create a new entry in the queue's recordings.
764-
Queue->EndTimeRecordings[*OutEvent] =
765-
ur_queue_handle_t_::end_time_recording{};
766+
Queue->EndTimeRecordings[*OutEvent] = 0;
766767

767768
ZE2UR_CALL(zeCommandListAppendWriteGlobalTimestamp,
768-
(CommandList->first,
769-
&Queue->EndTimeRecordings[*OutEvent].RecordEventEndTimestamp,
770-
ZeEvent, (*OutEvent)->WaitList.Length,
771-
(*OutEvent)->WaitList.ZeEventList));
769+
(CommandList->first, &Queue->EndTimeRecordings[*OutEvent], ZeEvent,
770+
(*OutEvent)->WaitList.Length, (*OutEvent)->WaitList.ZeEventList));
772771

773772
UR_CALL(
774773
Queue->executeCommandList(CommandList, Blocking, false /* OkToBatch */));
@@ -1096,10 +1095,11 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) {
10961095
auto Entry = Queue->EndTimeRecordings.find(Event);
10971096
if (Entry != Queue->EndTimeRecordings.end()) {
10981097
auto &EndTimeRecording = Entry->second;
1099-
if (EndTimeRecording.RecordEventEndTimestamp == 0) {
1098+
if (EndTimeRecording == 0) {
11001099
// If the end time recording has not finished, we tell the queue that
11011100
// the event is no longer alive to avoid invalid write-backs.
1102-
EndTimeRecording.EventHasDied = true;
1101+
Queue->EvictedEndTimeRecordings.insert(
1102+
Queue->EndTimeRecordings.extract(Entry));
11031103
} else {
11041104
// Otherwise we evict the entry.
11051105
Queue->EndTimeRecordings.erase(Entry);

source/adapters/level_zero/queue.cpp

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1566,24 +1566,23 @@ void ur_queue_handle_t_::clearEndTimeRecordings() {
15661566
for (auto Entry : EndTimeRecordings) {
15671567
auto &Event = Entry.first;
15681568
auto &EndTimeRecording = Entry.second;
1569-
if (!Entry.second.EventHasDied) {
1570-
// Write the result back to the event if it is not dead.
1571-
uint64_t ContextEndTime =
1572-
(EndTimeRecording.RecordEventEndTimestamp & TimestampMaxValue) *
1573-
ZeTimerResolution;
1574-
1575-
// Handle a possible wrap-around (the underlying HW counter is < 64-bit).
1576-
// Note, it will not report correct time if there were multiple wrap
1577-
// arounds, and the longer term plan is to enlarge the capacity of the
1578-
// HW timestamps.
1579-
if (ContextEndTime < Event->RecordEventStartTimestamp)
1580-
ContextEndTime += TimestampMaxValue * ZeTimerResolution;
1581-
1582-
// Store it in the event.
1583-
Event->RecordEventEndTimestamp = ContextEndTime;
1584-
}
1569+
1570+
// Write the result back to the event if it is not dead.
1571+
uint64_t ContextEndTime =
1572+
(EndTimeRecording & TimestampMaxValue) * ZeTimerResolution;
1573+
1574+
// Handle a possible wrap-around (the underlying HW counter is < 64-bit).
1575+
// Note, it will not report correct time if there were multiple wrap
1576+
// arounds, and the longer term plan is to enlarge the capacity of the
1577+
// HW timestamps.
1578+
if (ContextEndTime < Event->RecordEventStartTimestamp)
1579+
ContextEndTime += TimestampMaxValue * ZeTimerResolution;
1580+
1581+
// Store it in the event.
1582+
Event->RecordEventEndTimestamp = ContextEndTime;
15851583
}
15861584
EndTimeRecordings.clear();
1585+
EvictedEndTimeRecordings.clear();
15871586
}
15881587

15891588
ur_result_t urQueueReleaseInternal(ur_queue_handle_t Queue) {

source/adapters/level_zero/queue.hpp

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -492,15 +492,12 @@ struct ur_queue_handle_t_ : _ur_object {
492492
// End-times enqueued are stored on the queue rather than on the event to
493493
// avoid the event objects having been destroyed prior to the write to the
494494
// end-time member.
495-
struct end_time_recording {
496-
// RecordEventEndTimestamp is not adjusted for valid bits nor resolution, as
497-
// it is written asynchronously.
498-
uint64_t RecordEventEndTimestamp = 0;
499-
// The event may die before the recording has been written back. In this
500-
// case the event will mark this for deletion when the queue sees fit.
501-
bool EventHasDied = false;
502-
};
503-
std::map<ur_event_handle_t, end_time_recording> EndTimeRecordings;
495+
// RecordEventEndTimestamp is not adjusted for valid bits nor resolution, as
496+
// it is written asynchronously.
497+
std::map<ur_event_handle_t, uint64_t> EndTimeRecordings;
498+
// The event may die before the recording has been written back. In this case
499+
// we move it to a separate map to avoid conflicts.
500+
std::multimap<ur_event_handle_t, uint64_t> EvictedEndTimeRecordings;
504501

505502
// Clear the end time recording timestamps entries.
506503
void clearEndTimeRecordings();

0 commit comments

Comments
 (0)