Skip to content

Commit b1f1170

Browse files
committed
[L0] Fix cached and evicted timestamp recordings
This commit fixes two issues with the level zero implementation of timestamp recording events: * Events allocated for timestamp recordings may have been previously used, which may lead the implementation to think that the recordings of the old timestamp are right. The implementation will now reset the value of it. * To avoid cases where timestamp recordings could conflict in the recordings buffer, unfinished recordings of dead events are now moved to another map, to be evicted fully on queue synchronization or death. Signed-off-by: Larsen, Steffen <[email protected]>
1 parent 2651b05 commit b1f1170

File tree

3 files changed

+31
-35
lines changed

3 files changed

+31
-35
lines changed

source/adapters/level_zero/event.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -574,8 +574,7 @@ ur_result_t urEventGetProfilingInfo(
574574

575575
// End time needs to be adjusted for resolution and valid bits.
576576
uint64_t ContextEndTime =
577-
(EndTimeRecording.RecordEventEndTimestamp & TimestampMaxValue) *
578-
ZeTimerResolution;
577+
(EndTimeRecording & TimestampMaxValue) * ZeTimerResolution;
579578

580579
// If the result is 0, we have not yet gotten results back and so we just
581580
// return it.
@@ -748,20 +747,20 @@ ur_result_t urEnqueueTimestampRecordingExp(
748747
ze_event_handle_t ZeEvent = (*OutEvent)->ZeEvent;
749748
(*OutEvent)->WaitList = TmpWaitList;
750749

750+
// Reset the end timestamp, in case it has been previously used.
751+
(*OutEvent)->RecordEventEndTimestamp = 0;
752+
751753
uint64_t DeviceStartTimestamp = 0;
752754
UR_CALL(ur::level_zero::urDeviceGetGlobalTimestamps(
753755
Device, &DeviceStartTimestamp, nullptr));
754756
(*OutEvent)->RecordEventStartTimestamp = DeviceStartTimestamp;
755757

756758
// Create a new entry in the queue's recordings.
757-
Queue->EndTimeRecordings[*OutEvent] =
758-
ur_queue_handle_t_::end_time_recording{};
759+
Queue->EndTimeRecordings[*OutEvent] = 0;
759760

760761
ZE2UR_CALL(zeCommandListAppendWriteGlobalTimestamp,
761-
(CommandList->first,
762-
&Queue->EndTimeRecordings[*OutEvent].RecordEventEndTimestamp,
763-
ZeEvent, (*OutEvent)->WaitList.Length,
764-
(*OutEvent)->WaitList.ZeEventList));
762+
(CommandList->first, &Queue->EndTimeRecordings[*OutEvent], ZeEvent,
763+
(*OutEvent)->WaitList.Length, (*OutEvent)->WaitList.ZeEventList));
765764

766765
UR_CALL(
767766
Queue->executeCommandList(CommandList, Blocking, false /* OkToBatch */));
@@ -1089,10 +1088,11 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) {
10891088
auto Entry = Queue->EndTimeRecordings.find(Event);
10901089
if (Entry != Queue->EndTimeRecordings.end()) {
10911090
auto &EndTimeRecording = Entry->second;
1092-
if (EndTimeRecording.RecordEventEndTimestamp == 0) {
1091+
if (EndTimeRecording == 0) {
10931092
// If the end time recording has not finished, we tell the queue that
10941093
// the event is no longer alive to avoid invalid write-backs.
1095-
EndTimeRecording.EventHasDied = true;
1094+
Queue->EvictedEndTimeRecordings.insert(
1095+
Queue->EndTimeRecordings.extract(Entry));
10961096
} else {
10971097
// Otherwise we evict the entry.
10981098
Queue->EndTimeRecordings.erase(Entry);

source/adapters/level_zero/queue.cpp

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1563,24 +1563,23 @@ void ur_queue_handle_t_::clearEndTimeRecordings() {
15631563
for (auto Entry : EndTimeRecordings) {
15641564
auto &Event = Entry.first;
15651565
auto &EndTimeRecording = Entry.second;
1566-
if (!Entry.second.EventHasDied) {
1567-
// Write the result back to the event if it is not dead.
1568-
uint64_t ContextEndTime =
1569-
(EndTimeRecording.RecordEventEndTimestamp & TimestampMaxValue) *
1570-
ZeTimerResolution;
1571-
1572-
// Handle a possible wrap-around (the underlying HW counter is < 64-bit).
1573-
// Note, it will not report correct time if there were multiple wrap
1574-
// arounds, and the longer term plan is to enlarge the capacity of the
1575-
// HW timestamps.
1576-
if (ContextEndTime < Event->RecordEventStartTimestamp)
1577-
ContextEndTime += TimestampMaxValue * ZeTimerResolution;
1578-
1579-
// Store it in the event.
1580-
Event->RecordEventEndTimestamp = ContextEndTime;
1581-
}
1566+
1567+
// Write the result back to the event if it is not dead.
1568+
uint64_t ContextEndTime =
1569+
(EndTimeRecording & TimestampMaxValue) * ZeTimerResolution;
1570+
1571+
// Handle a possible wrap-around (the underlying HW counter is < 64-bit).
1572+
// Note, it will not report correct time if there were multiple wrap
1573+
// arounds, and the longer term plan is to enlarge the capacity of the
1574+
// HW timestamps.
1575+
if (ContextEndTime < Event->RecordEventStartTimestamp)
1576+
ContextEndTime += TimestampMaxValue * ZeTimerResolution;
1577+
1578+
// Store it in the event.
1579+
Event->RecordEventEndTimestamp = ContextEndTime;
15821580
}
15831581
EndTimeRecordings.clear();
1582+
EvictedEndTimeRecordings.clear();
15841583
}
15851584

15861585
ur_result_t urQueueReleaseInternal(ur_queue_handle_t Queue) {

source/adapters/level_zero/queue.hpp

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -490,15 +490,12 @@ struct ur_queue_handle_t_ : _ur_object {
490490
// End-times enqueued are stored on the queue rather than on the event to
491491
// avoid the event objects having been destroyed prior to the write to the
492492
// end-time member.
493-
struct end_time_recording {
494-
// RecordEventEndTimestamp is not adjusted for valid bits nor resolution, as
495-
// it is written asynchronously.
496-
uint64_t RecordEventEndTimestamp = 0;
497-
// The event may die before the recording has been written back. In this
498-
// case the event will mark this for deletion when the queue sees fit.
499-
bool EventHasDied = false;
500-
};
501-
std::map<ur_event_handle_t, end_time_recording> EndTimeRecordings;
493+
// RecordEventEndTimestamp is not adjusted for valid bits nor resolution, as
494+
// it is written asynchronously.
495+
std::map<ur_event_handle_t, uint64_t> EndTimeRecordings;
496+
// The event may die before the recording has been written back. In this case
497+
// we move it to a separate map to avoid conflicts.
498+
std::multimap<ur_event_handle_t, uint64_t> EvictedEndTimeRecordings;
502499

503500
// Clear the end time recording timestamps entries.
504501
void clearEndTimeRecordings();

0 commit comments

Comments
 (0)