Skip to content

Commit 7359edb

Browse files
authored
[Flang][runtime] Distinguish CPU time and elapsed time for cpu_time and system_clock (#96652)
The current implementation for `system_clock()` returns the CPU time instead of elapsed wallclock time. This PR fixes the issue and makes `system_clock()` correctly return elapsed time.
1 parent e414bf9 commit 7359edb

File tree

2 files changed

+64
-51
lines changed

2 files changed

+64
-51
lines changed

flang/runtime/time-intrinsic.cpp

Lines changed: 62 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -64,20 +64,29 @@ template <typename Unused = void> double GetCpuTime(fallback_implementation) {
6464
// clock_gettime is implemented in the pthread library for MinGW.
6565
// Using it here would mean that all programs that link libFortranRuntime are
6666
// required to also link to pthread. Instead, don't use the function.
67-
#undef CLOCKID
68-
#elif defined CLOCK_PROCESS_CPUTIME_ID
69-
#define CLOCKID CLOCK_PROCESS_CPUTIME_ID
67+
#undef CLOCKID_CPU_TIME
68+
#undef CLOCKID_ELAPSED_TIME
69+
#else
70+
// Determine what clock to use for CPU time.
71+
#if defined CLOCK_PROCESS_CPUTIME_ID
72+
#define CLOCKID_CPU_TIME CLOCK_PROCESS_CPUTIME_ID
7073
#elif defined CLOCK_THREAD_CPUTIME_ID
71-
#define CLOCKID CLOCK_THREAD_CPUTIME_ID
72-
#elif defined CLOCK_MONOTONIC
73-
#define CLOCKID CLOCK_MONOTONIC
74+
#define CLOCKID_CPU_TIME CLOCK_THREAD_CPUTIME_ID
75+
#else
76+
#undef CLOCKID_CPU_TIME
77+
#endif
78+
79+
// Determine what clock to use for elapsed time.
80+
#if defined CLOCK_MONOTONIC
81+
#define CLOCKID_ELAPSED_TIME CLOCK_MONOTONIC
7482
#elif defined CLOCK_REALTIME
75-
#define CLOCKID CLOCK_REALTIME
83+
#define CLOCKID_ELAPSED_TIME CLOCK_REALTIME
7684
#else
77-
#undef CLOCKID
85+
#undef CLOCKID_ELAPSED_TIME
86+
#endif
7887
#endif
7988

80-
#ifdef CLOCKID
89+
#ifdef CLOCKID_CPU_TIME
8190
// POSIX implementation using clock_gettime. This is only enabled where
8291
// clock_gettime is available.
8392
template <typename T = int, typename U = struct timespec>
@@ -86,17 +95,26 @@ double GetCpuTime(preferred_implementation,
8695
T ClockId = 0, U *Timespec = nullptr,
8796
decltype(clock_gettime(ClockId, Timespec)) *Enabled = nullptr) {
8897
struct timespec tspec;
89-
if (clock_gettime(CLOCKID, &tspec) == 0) {
98+
if (clock_gettime(CLOCKID_CPU_TIME, &tspec) == 0) {
9099
return tspec.tv_nsec * 1.0e-9 + tspec.tv_sec;
91100
}
92101
// Return some negative value to represent failure.
93102
return -1.0;
94103
}
95-
#endif
104+
#endif // CLOCKID_CPU_TIME
96105

97106
using count_t = std::int64_t;
98107
using unsigned_count_t = std::uint64_t;
99108

109+
// POSIX implementation using clock_gettime where available. The clock_gettime
110+
// result is in nanoseconds, which is converted as necessary to
111+
// - deciseconds for kind 1
112+
// - milliseconds for kinds 2, 4
113+
// - nanoseconds for kinds 8, 16
114+
constexpr unsigned_count_t DS_PER_SEC{10u};
115+
constexpr unsigned_count_t MS_PER_SEC{1'000u};
116+
constexpr unsigned_count_t NS_PER_SEC{1'000'000'000u};
117+
100118
// Computes HUGE(INT(0,kind)) as an unsigned integer value.
101119
static constexpr inline unsigned_count_t GetHUGE(int kind) {
102120
if (kind > 8) {
@@ -105,72 +123,65 @@ static constexpr inline unsigned_count_t GetHUGE(int kind) {
105123
return (unsigned_count_t{1} << ((8 * kind) - 1)) - 1;
106124
}
107125

108-
// This is the fallback implementation, which should work everywhere. Note that
109-
// in general we can't recover after std::clock has reached its maximum value.
126+
// Function converts a std::timespec_t into the desired count to
127+
// be returned by the timing functions in accordance with the requested
128+
// kind at the call site.
129+
count_t ConvertTimeSpecToCount(int kind, const std::timespec &tspec) {
130+
const unsigned_count_t huge{GetHUGE(kind)};
131+
unsigned_count_t sec{static_cast<unsigned_count_t>(tspec.tv_sec)};
132+
unsigned_count_t nsec{static_cast<unsigned_count_t>(tspec.tv_nsec)};
133+
if (kind >= 8) {
134+
return (sec * NS_PER_SEC + nsec) % (huge + 1);
135+
} else if (kind >= 2) {
136+
return (sec * MS_PER_SEC + (nsec / (NS_PER_SEC / MS_PER_SEC))) % (huge + 1);
137+
} else { // kind == 1
138+
return (sec * DS_PER_SEC + (nsec / (NS_PER_SEC / DS_PER_SEC))) % (huge + 1);
139+
}
140+
}
141+
142+
// This is the fallback implementation, which should work everywhere.
110143
template <typename Unused = void>
111144
count_t GetSystemClockCount(int kind, fallback_implementation) {
112-
std::clock_t timestamp{std::clock()};
113-
if (timestamp == static_cast<std::clock_t>(-1)) {
145+
std::timespec tspec;
146+
147+
if (std::timespec_get(&tspec, TIME_UTC) < 0) {
114148
// Return -HUGE(COUNT) to represent failure.
115149
return -static_cast<count_t>(GetHUGE(kind));
116150
}
117-
// Convert the timestamp to std::uint64_t with wrap-around. The timestamp is
118-
// most likely a floating-point value (since C'11), so compute the modulus
119-
// carefully when one is required.
120-
constexpr auto maxUnsignedCount{std::numeric_limits<unsigned_count_t>::max()};
121-
if constexpr (std::numeric_limits<std::clock_t>::max() > maxUnsignedCount) {
122-
timestamp -= maxUnsignedCount * std::floor(timestamp / maxUnsignedCount);
123-
}
124-
unsigned_count_t unsignedCount{static_cast<unsigned_count_t>(timestamp)};
125-
// Return the modulus of the unsigned integral count with HUGE(COUNT)+1.
126-
// The result is a signed integer but never negative.
127-
return static_cast<count_t>(unsignedCount % (GetHUGE(kind) + 1));
151+
152+
// Compute the timestamp as seconds plus nanoseconds in accordance
153+
// with the requested kind at the call site.
154+
return ConvertTimeSpecToCount(kind, tspec);
128155
}
129156

130157
template <typename Unused = void>
131158
count_t GetSystemClockCountRate(int kind, fallback_implementation) {
132-
return CLOCKS_PER_SEC;
159+
return kind >= 8 ? NS_PER_SEC : kind >= 2 ? MS_PER_SEC : DS_PER_SEC;
133160
}
134161

135162
template <typename Unused = void>
136163
count_t GetSystemClockCountMax(int kind, fallback_implementation) {
137-
constexpr auto max_clock_t{std::numeric_limits<std::clock_t>::max()};
138164
unsigned_count_t maxCount{GetHUGE(kind)};
139-
return max_clock_t <= maxCount ? static_cast<count_t>(max_clock_t)
140-
: static_cast<count_t>(maxCount);
165+
return maxCount;
141166
}
142167

143-
// POSIX implementation using clock_gettime where available. The clock_gettime
144-
// result is in nanoseconds, which is converted as necessary to
145-
// - deciseconds for kind 1
146-
// - milliseconds for kinds 2, 4
147-
// - nanoseconds for kinds 8, 16
148-
constexpr unsigned_count_t DS_PER_SEC{10u};
149-
constexpr unsigned_count_t MS_PER_SEC{1'000u};
150-
constexpr unsigned_count_t NS_PER_SEC{1'000'000'000u};
151-
152-
#ifdef CLOCKID
168+
#ifdef CLOCKID_ELAPSED_TIME
153169
template <typename T = int, typename U = struct timespec>
154170
count_t GetSystemClockCount(int kind, preferred_implementation,
155171
// We need some dummy parameters to pass to decltype(clock_gettime).
156172
T ClockId = 0, U *Timespec = nullptr,
157173
decltype(clock_gettime(ClockId, Timespec)) *Enabled = nullptr) {
158174
struct timespec tspec;
159175
const unsigned_count_t huge{GetHUGE(kind)};
160-
if (clock_gettime(CLOCKID, &tspec) != 0) {
176+
if (clock_gettime(CLOCKID_ELAPSED_TIME, &tspec) != 0) {
161177
return -huge; // failure
162178
}
163-
unsigned_count_t sec{static_cast<unsigned_count_t>(tspec.tv_sec)};
164-
unsigned_count_t nsec{static_cast<unsigned_count_t>(tspec.tv_nsec)};
165-
if (kind >= 8) {
166-
return (sec * NS_PER_SEC + nsec) % (huge + 1);
167-
} else if (kind >= 2) {
168-
return (sec * MS_PER_SEC + (nsec / (NS_PER_SEC / MS_PER_SEC))) % (huge + 1);
169-
} else { // kind == 1
170-
return (sec * DS_PER_SEC + (nsec / (NS_PER_SEC / DS_PER_SEC))) % (huge + 1);
171-
}
179+
180+
// Compute the timestamp as seconds plus nanoseconds in accordance
181+
// with the requested kind at the call site.
182+
return ConvertTimeSpecToCount(kind, tspec);
172183
}
173-
#endif
184+
#endif // CLOCKID_ELAPSED_TIME
174185

175186
template <typename T = int, typename U = struct timespec>
176187
count_t GetSystemClockCountRate(int kind, preferred_implementation,

flang/test/Runtime/no-cpp-dep.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,14 @@ int32_t RTNAME(ArgumentCount)();
3030
int32_t RTNAME(GetCommandArgument)(int32_t, const struct Descriptor *,
3131
const struct Descriptor *, const struct Descriptor *);
3232
int32_t RTNAME(GetEnvVariable)();
33+
int64_t RTNAME(SystemClockCount)(int kind);
3334

3435
int main() {
3536
double x = RTNAME(CpuTime)();
3637
RTNAME(ProgramStart)(0, 0, 0, 0);
3738
int32_t c = RTNAME(ArgumentCount)();
3839
int32_t v = RTNAME(GetCommandArgument)(0, 0, 0, 0);
3940
int32_t e = RTNAME(GetEnvVariable)("FOO", 0, 0);
41+
int64_t t = RTNAME(SystemClockCount)(8);
4042
return x + c + v + e;
4143
}

0 commit comments

Comments
 (0)