Skip to content

Commit 200694b

Browse files
authored
[SYCL] Support profiling info for event returned by NOP barrier (intel#12949)
Currently if ext_oneapi_barrier without waitlist is submitted to the in-order queue that doesn't have the last command (empty queue) then we return default constructed event which doesn't have profiling info because it is not associated with any queue. Associate such event with the queue and record submission time which is equal to the start time and the end time for such event because it basically corresponds to NOP.
1 parent 2488da1 commit 200694b

File tree

7 files changed

+113
-34
lines changed

7 files changed

+113
-34
lines changed

sycl/source/detail/event_impl.cpp

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -154,11 +154,15 @@ event_impl::event_impl(sycl::detail::pi::PiEvent Event,
154154
}
155155
}
156156

157-
event_impl::event_impl(const QueueImplPtr &Queue)
158-
: MQueue{Queue},
159-
MIsProfilingEnabled{Queue->is_host() || Queue->MIsProfilingEnabled},
160-
MFallbackProfiling{MIsProfilingEnabled && Queue->isProfilingFallback()} {
157+
event_impl::event_impl(const QueueImplPtr &Queue) {
161158
this->setContextImpl(Queue->getContextImplPtr());
159+
this->associateWithQueue(Queue);
160+
}
161+
162+
void event_impl::associateWithQueue(const QueueImplPtr &Queue) {
163+
MQueue = Queue;
164+
MIsProfilingEnabled = Queue->is_host() || Queue->MIsProfilingEnabled;
165+
MFallbackProfiling = MIsProfilingEnabled && Queue->isProfilingFallback();
162166
if (Queue->is_host()) {
163167
MState.store(HES_NotComplete);
164168
if (Queue->has_property<property::queue::enable_profiling>()) {
@@ -284,6 +288,7 @@ template <>
284288
uint64_t
285289
event_impl::get_profiling_info<info::event_profiling::command_submit>() {
286290
checkProfilingPreconditions();
291+
287292
// The delay between the submission and the actual start of a CommandBuffer
288293
// can be short. Consequently, the submission time, which is based on
289294
// an estimated clock and not on the real device clock, may be ahead of the
@@ -312,6 +317,11 @@ template <>
312317
uint64_t
313318
event_impl::get_profiling_info<info::event_profiling::command_start>() {
314319
checkProfilingPreconditions();
320+
321+
// For nop command start time is equal to submission time.
322+
if (isNOP() && MSubmitTime)
323+
return MSubmitTime;
324+
315325
if (!MHostEvent) {
316326
if (MEvent) {
317327
auto StartTime =
@@ -339,6 +349,11 @@ event_impl::get_profiling_info<info::event_profiling::command_start>() {
339349
template <>
340350
uint64_t event_impl::get_profiling_info<info::event_profiling::command_end>() {
341351
checkProfilingPreconditions();
352+
353+
// For nop command end time is equal to submission time.
354+
if (isNOP() && MSubmitTime)
355+
return MSubmitTime;
356+
342357
if (!MHostEvent) {
343358
if (MEvent) {
344359
auto EndTime =

sycl/source/detail/event_impl.hpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,17 @@ class event_impl {
223223
MSubmittedQueue = SubmittedQueue;
224224
};
225225

226+
/// Associate event with provided queue.
227+
///
228+
/// @return
229+
void associateWithQueue(const QueueImplPtr &Queue);
230+
231+
/// Indicates if this event is not associated with any command and doesn't
232+
/// have native handle.
233+
///
234+
/// @return true if no associated command and no event handle.
235+
bool isNOP() { return !MCommand && !getHandleRef(); }
236+
226237
/// Calling this function queries the current device timestamp and sets it as
227238
/// submission time for the command associated with this event.
228239
void setSubmissionTime();
@@ -316,8 +327,8 @@ class event_impl {
316327
std::unique_ptr<HostProfilingInfo> MHostProfilingInfo;
317328
void *MCommand = nullptr;
318329
std::weak_ptr<queue_impl> MQueue;
319-
const bool MIsProfilingEnabled = false;
320-
const bool MFallbackProfiling = false;
330+
bool MIsProfilingEnabled = false;
331+
bool MFallbackProfiling = false;
321332

322333
std::weak_ptr<queue_impl> MWorkerQueue;
323334
std::weak_ptr<queue_impl> MSubmittedQueue;

sycl/source/detail/helpers.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,9 @@ getOrWaitEvents(std::vector<sycl::event> DepEvents, ContextImplPtr Context) {
3131
// throwaway events created with empty constructor will not have a context
3232
// (which is set lazily) calling getContextImpl() would set that
3333
// context, which we wish to avoid as it is expensive.
34-
if (!SyclEventImplPtr->isContextInitialized() &&
35-
!SyclEventImplPtr->is_host()) {
34+
if ((!SyclEventImplPtr->isContextInitialized() &&
35+
!SyclEventImplPtr->is_host()) ||
36+
SyclEventImplPtr->isNOP()) {
3637
continue;
3738
}
3839
// The fusion command and its event are associated with a non-host context,

sycl/source/detail/queue_impl.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -290,8 +290,10 @@ areEventsSafeForSchedulerBypass(const std::vector<sycl::event> &DepEvents,
290290
// Events that don't have an initialized context are throwaway events that
291291
// don't represent actual dependencies. Calling getContextImpl() would set
292292
// their context, which we wish to avoid as it is expensive.
293-
if (!SyclEventImplPtr->isContextInitialized() &&
294-
!SyclEventImplPtr->is_host()) {
293+
// NOP events also don't represent actual dependencies.
294+
if ((!SyclEventImplPtr->isContextInitialized() &&
295+
!SyclEventImplPtr->is_host()) ||
296+
SyclEventImplPtr->isNOP()) {
295297
return true;
296298
}
297299
if (SyclEventImplPtr->is_host()) {

sycl/source/detail/scheduler/commands.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -263,8 +263,9 @@ std::vector<sycl::detail::pi::PiEvent> Command::getPiEventsBlocking(
263263
// Throwaway events created with empty constructor will not have a context
264264
// (which is set lazily) calling getContextImpl() would set that
265265
// context, which we wish to avoid as it is expensive.
266-
// Skip host task also.
267-
if (!EventImpl->isContextInitialized() || EventImpl->is_host())
266+
// Skip host task and NOP events also.
267+
if (!EventImpl->isContextInitialized() || EventImpl->is_host() ||
268+
EventImpl->isNOP())
268269
continue;
269270
// In this path nullptr native event means that the command has not been
270271
// enqueued. It may happen if async enqueue in a host task is involved.

sycl/source/queue.cpp

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,34 @@ void queue::wait_and_throw_proxy(const detail::code_location &CodeLoc) {
205205
impl->wait_and_throw(CodeLoc);
206206
}
207207

208+
static event
209+
getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) {
210+
// The last command recorded in the graph is not tracked by the queue but by
211+
// the graph itself. We must therefore search for the last node/event in the
212+
// graph.
213+
if (auto Graph = QueueImpl->getCommandGraph()) {
214+
auto LastEvent =
215+
Graph->getEventForNode(Graph->getLastInorderNode(QueueImpl));
216+
return sycl::detail::createSyclObjFromImpl<event>(LastEvent);
217+
}
218+
auto LastEvent = QueueImpl->getLastEvent();
219+
if (QueueImpl->MDiscardEvents) {
220+
std::cout << "Discard event enabled" << std::endl;
221+
return LastEvent;
222+
}
223+
224+
auto LastEventImpl = detail::getSyclObjImpl(LastEvent);
225+
// If last event is default constructed event then we want to associate it
226+
// with the queue and record submission time if profiling is enabled. Such
227+
// event corresponds to NOP and its submit time is same as start time and
228+
// end time.
229+
if (!LastEventImpl->isContextInitialized()) {
230+
LastEventImpl->associateWithQueue(QueueImpl);
231+
LastEventImpl->setSubmissionTime();
232+
}
233+
return detail::createSyclObjFromImpl<event>(LastEventImpl);
234+
}
235+
208236
/// Prevents any commands submitted afterward to this queue from executing
209237
/// until all commands previously submitted to this queue have entered the
210238
/// complete state.
@@ -213,16 +241,8 @@ void queue::wait_and_throw_proxy(const detail::code_location &CodeLoc) {
213241
/// \return a SYCL event object, which corresponds to the queue the command
214242
/// group is being enqueued on.
215243
event queue::ext_oneapi_submit_barrier(const detail::code_location &CodeLoc) {
216-
if (is_in_order()) {
217-
// The last command recorded in the graph is not tracked by the queue but by
218-
// the graph itself. We must therefore search for the last node/event in the
219-
// graph.
220-
if (auto Graph = impl->getCommandGraph()) {
221-
auto LastEvent = Graph->getEventForNode(Graph->getLastInorderNode(impl));
222-
return sycl::detail::createSyclObjFromImpl<event>(LastEvent);
223-
}
224-
return impl->getLastEvent();
225-
}
244+
if (is_in_order())
245+
return getBarrierEventForInorderQueueHelper(impl);
226246

227247
return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(); }, CodeLoc);
228248
}
@@ -238,20 +258,13 @@ event queue::ext_oneapi_submit_barrier(const detail::code_location &CodeLoc) {
238258
/// group is being enqueued on.
239259
event queue::ext_oneapi_submit_barrier(const std::vector<event> &WaitList,
240260
const detail::code_location &CodeLoc) {
241-
bool AllEventsEmpty = std::all_of(
261+
bool AllEventsEmptyOrNop = std::all_of(
242262
begin(WaitList), end(WaitList), [&](const event &Event) -> bool {
243-
return !detail::getSyclObjImpl(Event)->isContextInitialized();
263+
return !detail::getSyclObjImpl(Event)->isContextInitialized() ||
264+
detail::getSyclObjImpl(Event)->isNOP();
244265
});
245-
if (is_in_order() && AllEventsEmpty) {
246-
// The last command recorded in the graph is not tracked by the queue but by
247-
// the graph itself. We must therefore search for the last node/event in the
248-
// graph.
249-
if (auto Graph = impl->getCommandGraph()) {
250-
auto LastEvent = Graph->getEventForNode(Graph->getLastInorderNode(impl));
251-
return sycl::detail::createSyclObjFromImpl<event>(LastEvent);
252-
}
253-
return impl->getLastEvent();
254-
}
266+
if (is_in_order() && AllEventsEmptyOrNop)
267+
return getBarrierEventForInorderQueueHelper(impl);
255268

256269
return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(WaitList); },
257270
CodeLoc);
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
4+
// Test to check that it is possible to get profiling info from the event
5+
// returned by barrier which turns into NOP.
6+
7+
#include <sycl/sycl.hpp>
8+
9+
int main() {
10+
sycl::event start;
11+
sycl::event stop;
12+
sycl::queue q{sycl::property_list(sycl::property::queue::in_order(),
13+
sycl::property::queue::enable_profiling())};
14+
float elapsed = 0;
15+
16+
start = q.ext_oneapi_submit_barrier();
17+
std::cout << "before parallel_for" << std::endl;
18+
q.parallel_for(
19+
sycl::nd_range<3>(sycl::range<3>(1, 1, 16) * sycl::range<3>(1, 1, 16),
20+
sycl::range<3>(1, 1, 16)),
21+
[=](sycl::nd_item<3> item_ct1) {
22+
double d = 123;
23+
for (int i = 0; i < 10000; i++) {
24+
d = d * i;
25+
}
26+
});
27+
std::cout << "after parallel_for" << std::endl;
28+
stop = q.ext_oneapi_submit_barrier();
29+
stop.wait_and_throw();
30+
elapsed =
31+
(stop.get_profiling_info<sycl::info::event_profiling::command_end>() -
32+
start.get_profiling_info<sycl::info::event_profiling::command_start>()) /
33+
1000000.0f;
34+
std::cout << "elapsed:" << elapsed << std::endl;
35+
return 0;
36+
}

0 commit comments

Comments
 (0)