Skip to content

Commit 762fa4c

Browse files
committed
[SYCL] Fix computation of the submit_time based on host timestamps
getCurrentDeviceTime uses the following formula to calculate device time: [base device timestamp] + ([current host timestamp] - [base host timestamp]) Host time stamps are queried using std::chrono. Base host timestamp is updated in two cases: 1. The first call to getCurrentDeviceTime. 2. When refresh is needed. Problem is that currently we remember base host timestamp at the wrong moment: there is a large gap between the point when we query and remember base host timestamp and the point when we get device timestamp from plugin, so ([current host timestamp] - [base host timestamp]) includes execution time of things like getPlugin() which may be significant, especially on the first call when plugins initialization happen. As a result we add incorrect difference to the base device time and calculated submission time is incorrect, it is sometimes greater than start time. This patch fixes that problem by querying base host time properly after piGetDeviceAndHostTimer call.
1 parent 197c33a commit 762fa4c

File tree

3 files changed

+37
-25
lines changed

3 files changed

+37
-25
lines changed

sycl/source/detail/device_impl.cpp

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -599,8 +599,8 @@ ext::oneapi::experimental::architecture device_impl::getDeviceArch() const {
599599
return MDeviceArch;
600600
}
601601

602-
// On first call this function queries for device timestamp
603-
// along with host synchronized timestamp and stores it in memeber varaible
602+
// On the first call this function queries for device timestamp
603+
// along with host synchronized timestamp and stores it in member variable
604604
// MDeviceHostBaseTime. Subsequent calls to this function would just retrieve
605605
// the host timestamp, compute difference against the host timestamp in
606606
// MDeviceHostBaseTime and calculate the device timestamp based on the
@@ -622,14 +622,28 @@ uint64_t device_impl::getCurrentDeviceTime() {
622622
// To account for potential clock drift between host clock and device clock.
623623
// The value set is arbitrary: 200 seconds
624624
constexpr uint64_t TimeTillRefresh = 200e9;
625+
assert(HostTime >= MDeviceHostBaseTime.second);
625626
uint64_t Diff = HostTime - MDeviceHostBaseTime.second;
626627

627-
if (Diff > TimeTillRefresh || Diff <= 0) {
628+
// If getCurrentDeviceTime is called for the first time or we have to refresh.
629+
if (!MDeviceHostBaseTime.second || Diff > TimeTillRefresh) {
628630
const auto &Plugin = getPlugin();
629631
auto Result =
630632
Plugin->call_nocheck<detail::PiApiKind::piGetDeviceAndHostTimer>(
631633
MDevice, &MDeviceHostBaseTime.first, &MDeviceHostBaseTime.second);
632-
634+
// We have to remember base host timestamp right after PI call and it is
635+
// going to be used for calculation of the device timestamp at the next
636+
// getCurrentDeviceTime() call. We need to do it here because getPlugin()
637+
// and piGetDeviceAndHostTimer calls may take significant amount of time,
638+
// for example on the first call to getPlugin plugins may beed to be
639+
// initialized. If we use timestamp from the beginning of the function then
640+
// the difference between host timestamps of the current
641+
// getCurrentDeviceTime and the next getCurrentDeviceTime will be incorrect
642+
// because it will include execution time of the code before we get device
643+
// timestamp from piGetDeviceAndHostTimer.
644+
HostTime =
645+
duration_cast<nanoseconds>(steady_clock::now().time_since_epoch())
646+
.count();
633647
if (Result == PI_ERROR_INVALID_OPERATION) {
634648
char *p = nullptr;
635649
Plugin->call_nocheck<detail::PiApiKind::piPluginGetLastError>(&p);

sycl/source/detail/device_impl.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ class device_impl {
272272
mutable std::once_flag MDeviceNameFlag;
273273
mutable ext::oneapi::experimental::architecture MDeviceArch{};
274274
mutable std::once_flag MDeviceArchFlag;
275-
std::pair<uint64_t, uint64_t> MDeviceHostBaseTime;
275+
std::pair<uint64_t, uint64_t> MDeviceHostBaseTime{0, 0};
276276
}; // class device_impl
277277

278278
} // namespace detail

sycl/test-e2e/Basic/submit_time.cpp

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,32 @@
11
// RUN: %{build} -o %t.out
22
// RUN: %{run} %t.out
33

4-
// Check that device_impl::isGetDeviceAndHostTimerSupported() is not spoiling
5-
// device_impl::MDeviceHostBaseTime values used for submit timestamp
6-
// calculation.
4+
// Check that submission time is calculated properly.
75

86
#include <sycl/sycl.hpp>
97

108
using namespace sycl;
119

1210
int main(void) {
13-
sycl::queue queue(
14-
sycl::property_list{sycl::property::queue::enable_profiling()});
15-
sycl::event event = queue.submit([&](sycl::handler &cgh) {
16-
cgh.parallel_for<class set_value>(sycl::range<1>{1024},
17-
[=](sycl::id<1> idx) {});
18-
});
11+
queue q({property::queue::enable_profiling{}});
12+
int *data = malloc_host<int>(1024, q);
1913

20-
// SYCL RT internally calls device_impl::isGetDeviceAndHostTimerSupported()
21-
// to decide how to calculate "submit" timestamp - either using backend API
22-
// call or using fallback implementation.
23-
auto submit =
24-
event.get_profiling_info<sycl::info::event_profiling::command_submit>();
25-
auto start =
26-
event.get_profiling_info<sycl::info::event_profiling::command_start>();
27-
auto end =
28-
event.get_profiling_info<sycl::info::event_profiling::command_end>();
14+
for (int i = 0; i < 20; i++) {
15+
auto event = q.submit([&](sycl::handler &cgh) {
16+
cgh.parallel_for<class KernelTime>(sycl::range<1>(1024),
17+
[=](id<1> idx) { data[idx] = idx; });
18+
});
2919

30-
if (!(submit <= start) || !(start <= end))
31-
return -1;
20+
event.wait();
21+
auto submit_time =
22+
event.get_profiling_info<sycl::info::event_profiling::command_submit>();
23+
auto start_time =
24+
event.get_profiling_info<sycl::info::event_profiling::command_start>();
25+
auto end_time =
26+
event.get_profiling_info<sycl::info::event_profiling::command_end>();
3227

28+
if (!(submit_time <= start_time) || !(start_time <= end_time))
29+
return -1;
30+
}
3331
return 0;
3432
}

0 commit comments

Comments
 (0)