@@ -15,8 +15,8 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
15
15
16
16
if (eventPool->isEventPoolTimestampFlagSet ()) {
17
17
event->setEventTimestampFlag (true );
18
- event->kernelTimestampsData = std::make_unique<KernelTimestampsData<TagSizeT>[]>(EventPacketsCount::maxKernelSplit);
19
18
}
19
+ event->kernelEventCompletionData = std::make_unique<KernelEventCompletionData<TagSizeT>[]>(EventPacketsCount::maxKernelSplit);
20
20
21
21
auto alloc = eventPool->getAllocation ().getGraphicsAllocation (device->getNEODevice ()->getRootDeviceIndex ());
22
22
@@ -49,24 +49,24 @@ NEO::GraphicsAllocation &EventImp<TagSizeT>::getAllocation(Device *device) {
49
49
50
50
template <typename TagSizeT>
51
51
ze_result_t EventImp<TagSizeT>::calculateProfilingData() {
52
- globalStartTS = kernelTimestampsData [0 ].getGlobalStartValue (0 );
53
- globalEndTS = kernelTimestampsData [0 ].getGlobalEndValue (0 );
54
- contextStartTS = kernelTimestampsData [0 ].getContextStartValue (0 );
55
- contextEndTS = kernelTimestampsData [0 ].getContextEndValue (0 );
52
+ globalStartTS = kernelEventCompletionData [0 ].getGlobalStartValue (0 );
53
+ globalEndTS = kernelEventCompletionData [0 ].getGlobalEndValue (0 );
54
+ contextStartTS = kernelEventCompletionData [0 ].getContextStartValue (0 );
55
+ contextEndTS = kernelEventCompletionData [0 ].getContextEndValue (0 );
56
56
57
57
for (uint32_t i = 0 ; i < kernelCount; i++) {
58
- for (auto packetId = 0u ; packetId < kernelTimestampsData [i].getPacketsUsed (); packetId++) {
59
- if (globalStartTS > kernelTimestampsData [i].getGlobalStartValue (packetId)) {
60
- globalStartTS = kernelTimestampsData [i].getGlobalStartValue (packetId);
58
+ for (auto packetId = 0u ; packetId < kernelEventCompletionData [i].getPacketsUsed (); packetId++) {
59
+ if (globalStartTS > kernelEventCompletionData [i].getGlobalStartValue (packetId)) {
60
+ globalStartTS = kernelEventCompletionData [i].getGlobalStartValue (packetId);
61
61
}
62
- if (contextStartTS > kernelTimestampsData [i].getContextStartValue (packetId)) {
63
- contextStartTS = kernelTimestampsData [i].getContextStartValue (packetId);
62
+ if (contextStartTS > kernelEventCompletionData [i].getContextStartValue (packetId)) {
63
+ contextStartTS = kernelEventCompletionData [i].getContextStartValue (packetId);
64
64
}
65
- if (contextEndTS < kernelTimestampsData [i].getContextEndValue (packetId)) {
66
- contextEndTS = kernelTimestampsData [i].getContextEndValue (packetId);
65
+ if (contextEndTS < kernelEventCompletionData [i].getContextEndValue (packetId)) {
66
+ contextEndTS = kernelEventCompletionData [i].getContextEndValue (packetId);
67
67
}
68
- if (globalEndTS < kernelTimestampsData [i].getGlobalEndValue (packetId)) {
69
- globalEndTS = kernelTimestampsData [i].getGlobalEndValue (packetId);
68
+ if (globalEndTS < kernelEventCompletionData [i].getGlobalEndValue (packetId)) {
69
+ globalEndTS = kernelEventCompletionData [i].getGlobalEndValue (packetId);
70
70
}
71
71
}
72
72
}
@@ -75,23 +75,40 @@ ze_result_t EventImp<TagSizeT>::calculateProfilingData() {
75
75
}
76
76
77
77
template <typename TagSizeT>
78
- void EventImp<TagSizeT>::assignTimestampData (void *address) {
78
+ void EventImp<TagSizeT>::assignKernelEventCompletionData (void *address) {
79
79
for (uint32_t i = 0 ; i < kernelCount; i++) {
80
- uint32_t packetsToCopy = kernelTimestampsData[i].getPacketsUsed ();
80
+ uint32_t packetsToCopy = 0 ;
81
+ packetsToCopy = kernelEventCompletionData[i].getPacketsUsed ();
81
82
for (uint32_t packetId = 0 ; packetId < packetsToCopy; packetId++) {
82
- kernelTimestampsData [i].assignDataToAllTimestamps (packetId, address);
83
+ kernelEventCompletionData [i].assignDataToAllTimestamps (packetId, address);
83
84
address = ptrOffset (address, NEO::TimestampPackets<TagSizeT>::getSinglePacketSize ());
84
85
}
85
86
}
86
87
}
87
88
88
89
template <typename TagSizeT>
89
90
ze_result_t EventImp<TagSizeT>::queryStatusKernelTimestamp() {
90
- assignTimestampData (hostAddress);
91
+ assignKernelEventCompletionData (hostAddress);
92
+ uint32_t queryVal = Event::STATE_CLEARED;
93
+ for (uint32_t i = 0 ; i < kernelCount; i++) {
94
+ uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed ();
95
+ for (uint32_t packetId = 0 ; packetId < packetsToCheck; packetId++) {
96
+ if (kernelEventCompletionData[i].getContextEndValue (packetId) == queryVal) {
97
+ return ZE_RESULT_NOT_READY;
98
+ }
99
+ }
100
+ }
101
+ return ZE_RESULT_SUCCESS;
102
+ }
103
+
104
+ template <typename TagSizeT>
105
+ ze_result_t EventImp<TagSizeT>::queryStatusNonTimestamp() {
106
+ assignKernelEventCompletionData (hostAddress);
107
+ uint32_t queryVal = Event::STATE_CLEARED;
91
108
for (uint32_t i = 0 ; i < kernelCount; i++) {
92
- uint32_t packetsToCheck = kernelTimestampsData [i].getPacketsUsed ();
109
+ uint32_t packetsToCheck = kernelEventCompletionData [i].getPacketsUsed ();
93
110
for (uint32_t packetId = 0 ; packetId < packetsToCheck; packetId++) {
94
- if (kernelTimestampsData [i].getContextEndValue (packetId) == Event::STATE_CLEARED ) {
111
+ if (kernelEventCompletionData [i].getContextStartValue (packetId) == queryVal ) {
95
112
return ZE_RESULT_NOT_READY;
96
113
}
97
114
}
@@ -102,17 +119,16 @@ ze_result_t EventImp<TagSizeT>::queryStatusKernelTimestamp() {
102
119
template <typename TagSizeT>
103
120
ze_result_t EventImp<TagSizeT>::queryStatus() {
104
121
uint64_t *hostAddr = static_cast <uint64_t *>(hostAddress);
105
- uint32_t queryVal = Event::STATE_CLEARED;
106
122
107
123
if (metricStreamer != nullptr ) {
108
124
*hostAddr = metricStreamer->getNotificationState ();
109
125
}
110
126
this ->csr ->downloadAllocations ();
111
127
if (isEventTimestampFlagSet ()) {
112
128
return queryStatusKernelTimestamp ();
129
+ } else {
130
+ return queryStatusNonTimestamp ();
113
131
}
114
- memcpy_s (static_cast <void *>(&queryVal), sizeof (uint32_t ), static_cast <void *>(hostAddr), sizeof (uint32_t ));
115
- return (queryVal == Event::STATE_CLEARED) ? ZE_RESULT_NOT_READY : ZE_RESULT_SUCCESS;
116
132
}
117
133
118
134
template <typename TagSizeT>
@@ -130,7 +146,7 @@ ze_result_t EventImp<TagSizeT>::hostEventSetValueTimestamps(TagSizeT eventVal) {
130
146
}
131
147
};
132
148
for (uint32_t i = 0 ; i < kernelCount; i++) {
133
- uint32_t packetsToSet = kernelTimestampsData [i].getPacketsUsed ();
149
+ uint32_t packetsToSet = kernelEventCompletionData [i].getPacketsUsed ();
134
150
for (uint32_t j = 0 ; j < packetsToSet; j++) {
135
151
eventTsSetFunc (baseAddr + NEO::TimestampPackets<TagSizeT>::getContextStartOffset ());
136
152
eventTsSetFunc (baseAddr + NEO::TimestampPackets<TagSizeT>::getGlobalStartOffset ());
@@ -139,7 +155,7 @@ ze_result_t EventImp<TagSizeT>::hostEventSetValueTimestamps(TagSizeT eventVal) {
139
155
baseAddr += NEO::TimestampPackets<TagSizeT>::getSinglePacketSize ();
140
156
}
141
157
}
142
- assignTimestampData (hostAddress);
158
+ assignKernelEventCompletionData (hostAddress);
143
159
144
160
return ZE_RESULT_SUCCESS;
145
161
}
@@ -208,14 +224,12 @@ ze_result_t EventImp<TagSizeT>::reset() {
208
224
if (isEventTimestampFlagSet ()) {
209
225
kernelCount = EventPacketsCount::maxKernelSplit;
210
226
for (uint32_t i = 0 ; i < kernelCount; i++) {
211
- kernelTimestampsData [i].setPacketsUsed (NEO::TimestampPacketSizeControl::preferredPacketCount);
227
+ kernelEventCompletionData [i].setPacketsUsed (NEO::TimestampPacketSizeControl::preferredPacketCount);
212
228
}
213
- hostEventSetValue (Event::STATE_INITIAL);
214
- resetPackets ();
215
- return ZE_RESULT_SUCCESS;
216
- } else {
217
- return hostEventSetValue (Event::STATE_INITIAL);
218
229
}
230
+ hostEventSetValue (Event::STATE_INITIAL);
231
+ resetPackets ();
232
+ return ZE_RESULT_SUCCESS;
219
233
}
220
234
221
235
template <typename TagSizeT>
@@ -227,7 +241,7 @@ ze_result_t EventImp<TagSizeT>::queryKernelTimestamp(ze_kernel_timestamp_result_
227
241
return ZE_RESULT_NOT_READY;
228
242
}
229
243
230
- assignTimestampData (hostAddress);
244
+ assignKernelEventCompletionData (hostAddress);
231
245
calculateProfilingData ();
232
246
233
247
auto eventTsSetFunc = [&](uint64_t ×tampFieldToCopy, uint64_t ×tampFieldForWriting) {
@@ -288,10 +302,10 @@ ze_result_t EventImp<TagSizeT>::queryTimestampsExp(Device *device, uint32_t *pCo
288
302
packetId = static_cast <NEO::SubDevice *>(deviceImp->neoDevice )->getSubDeviceIndex ();
289
303
}
290
304
291
- globalStartTs = kernelTimestampsData [timestampPacket].getGlobalStartValue (packetId);
292
- contextStartTs = kernelTimestampsData [timestampPacket].getContextStartValue (packetId);
293
- contextEndTs = kernelTimestampsData [timestampPacket].getContextEndValue (packetId);
294
- globalEndTs = kernelTimestampsData [timestampPacket].getGlobalEndValue (packetId);
305
+ globalStartTs = kernelEventCompletionData [timestampPacket].getGlobalStartValue (packetId);
306
+ contextStartTs = kernelEventCompletionData [timestampPacket].getContextStartValue (packetId);
307
+ contextEndTs = kernelEventCompletionData [timestampPacket].getContextEndValue (packetId);
308
+ globalEndTs = kernelEventCompletionData [timestampPacket].getGlobalEndValue (packetId);
295
309
296
310
queryTsEventAssignFunc (result.global .kernelStart , globalStartTs);
297
311
queryTsEventAssignFunc (result.context .kernelStart , contextStartTs);
@@ -305,37 +319,31 @@ ze_result_t EventImp<TagSizeT>::queryTimestampsExp(Device *device, uint32_t *pCo
305
319
template <typename TagSizeT>
306
320
void EventImp<TagSizeT>::resetPackets() {
307
321
for (uint32_t i = 0 ; i < kernelCount; i++) {
308
- kernelTimestampsData [i].setPacketsUsed (1 );
322
+ kernelEventCompletionData [i].setPacketsUsed (1 );
309
323
}
310
324
kernelCount = 1 ;
311
325
}
312
326
313
327
template <typename TagSizeT>
314
328
uint32_t EventImp<TagSizeT>::getPacketsInUse() {
315
- if (isEventTimestampFlagSet ()) {
316
- uint32_t packetsInUse = 0 ;
317
- for (uint32_t i = 0 ; i < kernelCount; i++) {
318
- packetsInUse += kernelTimestampsData[i].getPacketsUsed ();
319
- };
320
- return packetsInUse;
321
- } else {
322
- return 1 ;
329
+ uint32_t packetsInUse = 0 ;
330
+ for (uint32_t i = 0 ; i < kernelCount; i++) {
331
+ packetsInUse += kernelEventCompletionData[i].getPacketsUsed ();
323
332
}
333
+ return packetsInUse;
324
334
}
325
335
326
336
template <typename TagSizeT>
327
337
void EventImp<TagSizeT>::setPacketsInUse(uint32_t value) {
328
- kernelTimestampsData [getCurrKernelDataIndex ()].setPacketsUsed (value);
329
- };
338
+ kernelEventCompletionData [getCurrKernelDataIndex ()].setPacketsUsed (value);
339
+ }
330
340
331
341
template <typename TagSizeT>
332
342
uint64_t EventImp<TagSizeT>::getPacketAddress(Device *device) {
333
343
uint64_t address = getGpuAddress (device);
334
- if (isEventTimestampFlagSet () && kernelCount > 1 ) {
335
- for (uint32_t i = 0 ; i < kernelCount - 1 ; i++) {
336
- address += kernelTimestampsData[i].getPacketsUsed () *
337
- NEO::TimestampPackets<TagSizeT>::getSinglePacketSize ();
338
- }
344
+ for (uint32_t i = 0 ; i < kernelCount - 1 ; i++) {
345
+ address += kernelEventCompletionData[i].getPacketsUsed () *
346
+ NEO::TimestampPackets<TagSizeT>::getSinglePacketSize ();
339
347
}
340
348
return address;
341
349
}
0 commit comments