Skip to content

Commit c60a1fc

Browse files
authored
[SYCL] Make host task timestamps share the same base as device tasks (#18710)
Currently, we use `std::chrono` to record `command_submit`, `command_start` and `command_end` timestamps for host tasks. So, if there there is a mix of device tasks and host tasks submitted to the queue, they have different time base and inconvenient to use/compare. This PR makes host task timestamps to have the same time base as device tasks.
1 parent 89287ba commit c60a1fc

File tree

5 files changed

+93
-34
lines changed

5 files changed

+93
-34
lines changed

sycl/include/sycl/detail/host_profiling_info.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,19 @@ namespace sycl {
1616
inline namespace _V1 {
1717
namespace detail {
1818

19+
class device_impl;
20+
1921
/// Profiling info for the host execution.
2022
class __SYCL_EXPORT HostProfilingInfo {
2123
uint64_t StartTime = 0;
2224
uint64_t EndTime = 0;
25+
device_impl *Device = nullptr;
2326

2427
public:
28+
// Sets the device associated with a queue that was used to submit the host
29+
// task.
30+
void setDevice(device_impl *Dev) { Device = Dev; }
31+
2532
/// Returns event's start time.
2633
///
2734
/// \return event's start time in nanoseconds.

sycl/source/detail/event_impl.cpp

Lines changed: 35 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,25 @@ void event_impl::setComplete() {
113113
assert(false && "setComplete is not supported for non-host event");
114114
}
115115

116-
static uint64_t inline getTimestamp() {
117-
auto Timestamp = std::chrono::high_resolution_clock::now().time_since_epoch();
118-
return std::chrono::duration_cast<std::chrono::nanoseconds>(Timestamp)
119-
.count();
116+
static uint64_t inline getTimestamp(device_impl *Device) {
117+
if (Device) {
118+
try {
119+
return Device->getCurrentDeviceTime();
120+
} catch (sycl::exception &e) {
121+
if (e.code() == sycl::errc::feature_not_supported)
122+
throw sycl::exception(
123+
make_error_code(errc::profiling),
124+
std::string("Unable to get command group submission time: ") +
125+
e.what());
126+
std::rethrow_exception(std::current_exception());
127+
}
128+
} else {
129+
// Returning host time
130+
using namespace std::chrono;
131+
return duration_cast<nanoseconds>(
132+
high_resolution_clock::now().time_since_epoch())
133+
.count();
134+
}
120135
}
121136

122137
ur_event_handle_t event_impl::getHandle() const { return MEvent.load(); }
@@ -186,6 +201,16 @@ void event_impl::setQueue(queue_impl &Queue) {
186201
MIsDefaultConstructed = false;
187202
}
188203

204+
void event_impl::setSubmittedQueue(std::weak_ptr<queue_impl> SubmittedQueue) {
205+
MSubmittedQueue = SubmittedQueue;
206+
if (MHostProfilingInfo) {
207+
if (auto QueuePtr = SubmittedQueue.lock()) {
208+
device_impl &Device = QueuePtr->getDeviceImpl();
209+
MHostProfilingInfo->setDevice(&Device);
210+
}
211+
}
212+
}
213+
189214
void *event_impl::instrumentationProlog(std::string &Name, int32_t StreamID,
190215
uint64_t &IId) const {
191216
void *TraceEvent = nullptr;
@@ -476,9 +501,9 @@ event_impl::get_backend_info<info::device::backend_version>() const {
476501
}
477502
#endif
478503

479-
void HostProfilingInfo::start() { StartTime = getTimestamp(); }
504+
void HostProfilingInfo::start() { StartTime = getTimestamp(Device); }
480505

481-
void HostProfilingInfo::end() { EndTime = getTimestamp(); }
506+
void HostProfilingInfo::end() { EndTime = getTimestamp(Device); }
482507

483508
ur_native_handle_t event_impl::getNative() {
484509
if (isHost())
@@ -568,23 +593,10 @@ void event_impl::setSubmissionTime() {
568593
if (!MIsProfilingEnabled && !MProfilingTagEvent)
569594
return;
570595

571-
if (QueueImplPtr Queue = MQueue.lock()) {
572-
try {
573-
MSubmitTime = Queue->getDeviceImpl().getCurrentDeviceTime();
574-
} catch (sycl::exception &e) {
575-
if (e.code() == sycl::errc::feature_not_supported)
576-
throw sycl::exception(
577-
make_error_code(errc::profiling),
578-
std::string("Unable to get command group submission time: ") +
579-
e.what());
580-
std::rethrow_exception(std::current_exception());
581-
}
582-
} else {
583-
// Returning host time
584-
using namespace std::chrono;
585-
MSubmitTime = duration_cast<nanoseconds>(
586-
high_resolution_clock::now().time_since_epoch())
587-
.count();
596+
std::weak_ptr<queue_impl> Queue = isHost() ? MSubmittedQueue : MQueue;
597+
if (QueueImplPtr QueuePtr = Queue.lock()) {
598+
device_impl &Device = QueuePtr->getDeviceImpl();
599+
MSubmitTime = getTimestamp(&Device);
588600
}
589601
}
590602

sycl/source/detail/event_impl.hpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -241,9 +241,7 @@ class event_impl {
241241
/// Sets original queue used for submission.
242242
///
243243
/// @return
244-
void setSubmittedQueue(std::weak_ptr<queue_impl> SubmittedQueue) {
245-
MSubmittedQueue = SubmittedQueue;
246-
};
244+
void setSubmittedQueue(std::weak_ptr<queue_impl> SubmittedQueue);
247245

248246
/// Indicates if this event is not associated with any command and doesn't
249247
/// have native handle.

sycl/test-e2e/Basic/submit_time.cpp

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,30 +46,71 @@ int main(void) {
4646
// All shortcut memory operations use queue_impl::submitMemOpHelper.
4747
// This test covers memcpy as a representative, extend if other operations
4848
// diverge.
49+
uint64_t memcpy_submit_time = 0;
50+
uint64_t memcpy_start_time = 0;
51+
uint64_t memcpy_end_time = 0;
4952
for (int i = 0; i < 5; i++) {
5053
auto memcpy_event = q.memcpy(dest, data, sizeof(int) * n);
5154
memcpy_event.wait();
5255

53-
auto submit_time =
56+
auto memcpy_submit_time =
5457
memcpy_event
5558
.get_profiling_info<sycl::info::event_profiling::command_submit>();
56-
auto start_time =
59+
auto memcpy_start_time =
5760
memcpy_event
5861
.get_profiling_info<sycl::info::event_profiling::command_start>();
59-
auto end_time =
62+
auto memcpy_end_time =
6063
memcpy_event
6164
.get_profiling_info<sycl::info::event_profiling::command_end>();
6265

6366
// Print for debugging
64-
std::cout << "Memcpy Event - Submit: " << submit_time
65-
<< ", Start: " << start_time << ", End: " << end_time
66-
<< std::endl;
67+
std::cout << "Memcpy Event - Submit: " << memcpy_submit_time
68+
<< ", Start: " << memcpy_start_time
69+
<< ", End: " << memcpy_end_time << std::endl;
6770

68-
assert(submit_time != 0 && "Submit time should not be zero");
69-
assert((submit_time <= start_time) && (start_time <= end_time));
71+
assert(memcpy_submit_time != 0 && "Submit time should not be zero");
72+
assert((memcpy_submit_time <= memcpy_start_time) &&
73+
(memcpy_start_time <= memcpy_end_time));
7074
}
7175

7276
sycl::free(data, q);
7377
sycl::free(dest, q);
78+
79+
// Check that host_task profiling timestamps share the same base as device
80+
// tasks.
81+
auto host_task_event =
82+
q.submit([&](sycl::handler &cgh) { cgh.host_task([=]() {}); });
83+
q.wait();
84+
auto device_task_event = q.submit([&](sycl::handler &cgh) {
85+
cgh.parallel_for<class DeviceTask>(sycl::range<1>(1), [=](sycl::id<1>) {});
86+
});
87+
88+
const uint64_t host_submitted = host_task_event.template get_profiling_info<
89+
sycl::info::event_profiling::command_submit>();
90+
const uint64_t host_start = host_task_event.template get_profiling_info<
91+
sycl::info::event_profiling::command_start>();
92+
const uint64_t host_end = host_task_event.template get_profiling_info<
93+
sycl::info::event_profiling::command_end>();
94+
std::cout << "Submit time: " << host_submitted
95+
<< ", Start time: " << host_start << ", End time: " << host_end
96+
<< std::endl;
97+
98+
const uint64_t device_submitted =
99+
device_task_event.template get_profiling_info<
100+
sycl::info::event_profiling::command_submit>();
101+
const uint64_t device_start = device_task_event.template get_profiling_info<
102+
sycl::info::event_profiling::command_start>();
103+
const uint64_t device_end = device_task_event.template get_profiling_info<
104+
sycl::info::event_profiling::command_end>();
105+
std::cout << "Device Submit time: " << device_submitted
106+
<< ", Device Start time: " << device_start
107+
<< ", Device End time: " << device_end << std::endl;
108+
assert(host_submitted >= memcpy_submit_time &&
109+
host_submitted <= device_submitted &&
110+
"Host and device submit expected to share the same base");
111+
assert(host_start >= memcpy_start_time && host_start <= device_start &&
112+
"Host and device start expected to share the same base");
113+
assert(host_end >= memcpy_end_time && host_end <= device_end &&
114+
"Host and device end expected to share the same base");
74115
return 0;
75116
}

sycl/test/abi/sycl_symbols_windows.dump

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4380,6 +4380,7 @@
43804380
?setArgHelper@handler@_V1@sycl@@AEAAXHAEAVwork_group_memory_impl@detail@23@@Z
43814381
?setArgsHelper@handler@_V1@sycl@@AEAAXH@Z
43824382
?setArgsToAssociatedAccessors@handler@_V1@sycl@@AEAAXXZ
4383+
?setDevice@HostProfilingInfo@detail@_V1@sycl@@QEAAXPEAVdevice_impl@234@@Z
43834384
?setHandlerKernelBundle@handler@_V1@sycl@@AEAAXAEBV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@@Z
43844385
?setHandlerKernelBundle@handler@_V1@sycl@@AEAAXVkernel@23@@Z
43854386
?setKernelCacheConfig@handler@_V1@sycl@@AEAAXW4StableKernelCacheConfig@123@@Z

0 commit comments

Comments
 (0)