Skip to content

Commit 2547563

Browse files
authored
[SYCL] Fix computation of the submit time based on host timestamps (#12104)
getCurrentDeviceTime uses the following formula to calculate device time: [base device timestamp] + ([current host timestamp] - [base host timestamp]). Host time stamps are queried using std::chrono. And base host timestamp is updated in two cases: 1. The first call to getCurrentDeviceTime. 2. When refresh is needed. Problem is that currently we remember base host timestamp at the wrong moment: there is a large gap between the point when we query and remember base host timestamp and the point when we get device timestamp from plugin, so ([current host timestamp] - [base host timestamp]) includes execution time of things like getPlugin() which may be significant, especially on the first call when plugins initialization happen. As a result we add incorrect difference to the base device time and calculated submission time is incorrect, it is sometimes greater than command_start time. This patch fixes that problem by querying base host time properly after piGetDeviceAndHostTimer call.
1 parent 17ab095 commit 2547563

File tree

3 files changed

+40
-28
lines changed

3 files changed

+40
-28
lines changed

sycl/source/detail/device_impl.cpp

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -599,8 +599,8 @@ ext::oneapi::experimental::architecture device_impl::getDeviceArch() const {
599599
return MDeviceArch;
600600
}
601601

602-
// On first call this function queries for device timestamp
603-
// along with host synchronized timestamp and stores it in memeber varaible
602+
// On the first call this function queries for device timestamp
603+
// along with host synchronized timestamp and stores it in member variable
604604
// MDeviceHostBaseTime. Subsequent calls to this function would just retrieve
605605
// the host timestamp, compute difference against the host timestamp in
606606
// MDeviceHostBaseTime and calculate the device timestamp based on the
@@ -622,14 +622,28 @@ uint64_t device_impl::getCurrentDeviceTime() {
622622
// To account for potential clock drift between host clock and device clock.
623623
// The value set is arbitrary: 200 seconds
624624
constexpr uint64_t TimeTillRefresh = 200e9;
625+
assert(HostTime >= MDeviceHostBaseTime.second);
625626
uint64_t Diff = HostTime - MDeviceHostBaseTime.second;
626627

627-
if (Diff > TimeTillRefresh || Diff <= 0) {
628+
// If getCurrentDeviceTime is called for the first time or we have to refresh.
629+
if (!MDeviceHostBaseTime.second || Diff > TimeTillRefresh) {
628630
const auto &Plugin = getPlugin();
629631
auto Result =
630632
Plugin->call_nocheck<detail::PiApiKind::piGetDeviceAndHostTimer>(
631633
MDevice, &MDeviceHostBaseTime.first, &MDeviceHostBaseTime.second);
632-
634+
// We have to remember base host timestamp right after PI call and it is
635+
// going to be used for calculation of the device timestamp at the next
636+
// getCurrentDeviceTime() call. We need to do it here because getPlugin()
637+
// and piGetDeviceAndHostTimer calls may take significant amount of time,
638+
// for example on the first call to getPlugin plugins may need to be
639+
// initialized. If we use timestamp from the beginning of the function then
640+
// the difference between host timestamps of the current
641+
// getCurrentDeviceTime and the next getCurrentDeviceTime will be incorrect
642+
// because it will include execution time of the code before we get device
643+
// timestamp from piGetDeviceAndHostTimer.
644+
HostTime =
645+
duration_cast<nanoseconds>(steady_clock::now().time_since_epoch())
646+
.count();
633647
if (Result == PI_ERROR_INVALID_OPERATION) {
634648
char *p = nullptr;
635649
Plugin->call_nocheck<detail::PiApiKind::piPluginGetLastError>(&p);

sycl/source/detail/device_impl.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ class device_impl {
274274
mutable std::once_flag MDeviceNameFlag;
275275
mutable ext::oneapi::experimental::architecture MDeviceArch{};
276276
mutable std::once_flag MDeviceArchFlag;
277-
std::pair<uint64_t, uint64_t> MDeviceHostBaseTime;
277+
std::pair<uint64_t, uint64_t> MDeviceHostBaseTime{0, 0};
278278
}; // class device_impl
279279

280280
} // namespace detail

sycl/test-e2e/Basic/submit_time.cpp

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,32 @@
11
// RUN: %{build} -o %t.out
2-
// RUN: %{run} %t.out
2+
// There is an issue with reported device time for the L0 backend, works only on
3+
// pvc for now. No such problems for other backends.
4+
// RUN: %if (!ext_oneapi_level_zero || gpu-intel-pvc) %{ %{run} %t.out %}
35

4-
// Check that device_impl::isGetDeviceAndHostTimerSupported() is not spoiling
5-
// device_impl::MDeviceHostBaseTime values used for submit timestamp
6-
// calculation.
6+
// Check that submission time is calculated properly.
77

88
#include <sycl/sycl.hpp>
99

10-
using namespace sycl;
11-
1210
int main(void) {
13-
sycl::queue queue(
14-
sycl::property_list{sycl::property::queue::enable_profiling()});
15-
sycl::event event = queue.submit([&](sycl::handler &cgh) {
16-
cgh.parallel_for<class set_value>(sycl::range<1>{1024},
17-
[=](sycl::id<1> idx) {});
18-
});
11+
sycl::queue q({sycl::property::queue::enable_profiling{}});
12+
int *data = malloc_host<int>(1024, q);
1913

20-
// SYCL RT internally calls device_impl::isGetDeviceAndHostTimerSupported()
21-
// to decide how to calculate "submit" timestamp - either using backend API
22-
// call or using fallback implementation.
23-
auto submit =
24-
event.get_profiling_info<sycl::info::event_profiling::command_submit>();
25-
auto start =
26-
event.get_profiling_info<sycl::info::event_profiling::command_start>();
27-
auto end =
28-
event.get_profiling_info<sycl::info::event_profiling::command_end>();
14+
for (int i = 0; i < 20; i++) {
15+
auto event = q.submit([&](sycl::handler &cgh) {
16+
cgh.parallel_for<class KernelTime>(
17+
sycl::range<1>(1024), [=](sycl::id<1> idx) { data[idx] = idx; });
18+
});
2919

30-
if (!(submit <= start) || !(start <= end))
31-
return -1;
20+
event.wait();
21+
auto submit_time =
22+
event.get_profiling_info<sycl::info::event_profiling::command_submit>();
23+
auto start_time =
24+
event.get_profiling_info<sycl::info::event_profiling::command_start>();
25+
auto end_time =
26+
event.get_profiling_info<sycl::info::event_profiling::command_end>();
3227

28+
assert((submit_time <= start_time) && (start_time <= end_time));
29+
}
30+
sycl::free(data, q);
3331
return 0;
3432
}

0 commit comments

Comments
 (0)