Skip to content

[Flang][runtime] Distinguish CPU time and elapsed time for cpu_time and system_clock #96652

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jul 2, 2024
113 changes: 62 additions & 51 deletions flang/runtime/time-intrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,20 +64,29 @@ template <typename Unused = void> double GetCpuTime(fallback_implementation) {
// clock_gettime is implemented in the pthread library for MinGW.
// Using it here would mean that all programs that link libFortranRuntime are
// required to also link to pthread. Instead, don't use the function.
#undef CLOCKID
#elif defined CLOCK_PROCESS_CPUTIME_ID
#define CLOCKID CLOCK_PROCESS_CPUTIME_ID
#undef CLOCKID_CPU_TIME
#undef CLOCKID_ELAPSED_TIME
#else
// Determine what clock to use for CPU time.
#if defined CLOCK_PROCESS_CPUTIME_ID
#define CLOCKID_CPU_TIME CLOCK_PROCESS_CPUTIME_ID
#elif defined CLOCK_THREAD_CPUTIME_ID
#define CLOCKID CLOCK_THREAD_CPUTIME_ID
#elif defined CLOCK_MONOTONIC
#define CLOCKID CLOCK_MONOTONIC
#define CLOCKID_CPU_TIME CLOCK_THREAD_CPUTIME_ID
#else
#undef CLOCKID_CPU_TIME
#endif

// Determine what clock to use for elapsed time.
#if defined CLOCK_MONOTONIC
#define CLOCKID_ELAPSED_TIME CLOCK_MONOTONIC
#elif defined CLOCK_REALTIME
#define CLOCKID CLOCK_REALTIME
#define CLOCKID_ELAPSED_TIME CLOCK_REALTIME
#else
#undef CLOCKID
#undef CLOCKID_ELAPSED_TIME
#endif
#endif

#ifdef CLOCKID
#ifdef CLOCKID_CPU_TIME
// POSIX implementation using clock_gettime. This is only enabled where
// clock_gettime is available.
template <typename T = int, typename U = struct timespec>
Expand All @@ -86,17 +95,26 @@ double GetCpuTime(preferred_implementation,
T ClockId = 0, U *Timespec = nullptr,
decltype(clock_gettime(ClockId, Timespec)) *Enabled = nullptr) {
struct timespec tspec;
if (clock_gettime(CLOCKID, &tspec) == 0) {
if (clock_gettime(CLOCKID_CPU_TIME, &tspec) == 0) {
return tspec.tv_nsec * 1.0e-9 + tspec.tv_sec;
}
// Return some negative value to represent failure.
return -1.0;
}
#endif
#endif // CLOCKID_CPU_TIME

using count_t = std::int64_t;
using unsigned_count_t = std::uint64_t;

// POSIX implementation using clock_gettime where available. The clock_gettime
// result is in nanoseconds, which is converted as necessary to
// - deciseconds for kind 1
// - milliseconds for kinds 2, 4
// - nanoseconds for kinds 8, 16
constexpr unsigned_count_t DS_PER_SEC{10u};
constexpr unsigned_count_t MS_PER_SEC{1'000u};
constexpr unsigned_count_t NS_PER_SEC{1'000'000'000u};

// Computes HUGE(INT(0,kind)) as an unsigned integer value.
static constexpr inline unsigned_count_t GetHUGE(int kind) {
if (kind > 8) {
Expand All @@ -105,72 +123,65 @@ static constexpr inline unsigned_count_t GetHUGE(int kind) {
return (unsigned_count_t{1} << ((8 * kind) - 1)) - 1;
}

// This is the fallback implementation, which should work everywhere. Note that
// in general we can't recover after std::clock has reached its maximum value.
// Function converts a std::timespec_t into the desired count to
// be returned by the timing functions in accordance with the requested
// kind at the call site.
count_t ConvertTimeSpecToCount(int kind, const std::timespec &tspec) {
const unsigned_count_t huge{GetHUGE(kind)};
unsigned_count_t sec{static_cast<unsigned_count_t>(tspec.tv_sec)};
unsigned_count_t nsec{static_cast<unsigned_count_t>(tspec.tv_nsec)};
if (kind >= 8) {
return (sec * NS_PER_SEC + nsec) % (huge + 1);
} else if (kind >= 2) {
return (sec * MS_PER_SEC + (nsec / (NS_PER_SEC / MS_PER_SEC))) % (huge + 1);
} else { // kind == 1
return (sec * DS_PER_SEC + (nsec / (NS_PER_SEC / DS_PER_SEC))) % (huge + 1);
}
}

// This is the fallback implementation, which should work everywhere.
template <typename Unused = void>
count_t GetSystemClockCount(int kind, fallback_implementation) {
std::clock_t timestamp{std::clock()};
if (timestamp == static_cast<std::clock_t>(-1)) {
std::timespec tspec;

if (std::timespec_get(&tspec, TIME_UTC) < 0) {
// Return -HUGE(COUNT) to represent failure.
return -static_cast<count_t>(GetHUGE(kind));
}
// Convert the timestamp to std::uint64_t with wrap-around. The timestamp is
// most likely a floating-point value (since C'11), so compute the modulus
// carefully when one is required.
constexpr auto maxUnsignedCount{std::numeric_limits<unsigned_count_t>::max()};
if constexpr (std::numeric_limits<std::clock_t>::max() > maxUnsignedCount) {
timestamp -= maxUnsignedCount * std::floor(timestamp / maxUnsignedCount);
}
unsigned_count_t unsignedCount{static_cast<unsigned_count_t>(timestamp)};
// Return the modulus of the unsigned integral count with HUGE(COUNT)+1.
// The result is a signed integer but never negative.
return static_cast<count_t>(unsignedCount % (GetHUGE(kind) + 1));

// Compute the timestamp as seconds plus nanoseconds in accordance
// with the requested kind at the call site.
return ConvertTimeSpecToCount(kind, tspec);
}

template <typename Unused = void>
count_t GetSystemClockCountRate(int kind, fallback_implementation) {
return CLOCKS_PER_SEC;
return kind >= 8 ? NS_PER_SEC : kind >= 2 ? MS_PER_SEC : DS_PER_SEC;
}

template <typename Unused = void>
count_t GetSystemClockCountMax(int kind, fallback_implementation) {
constexpr auto max_clock_t{std::numeric_limits<std::clock_t>::max()};
unsigned_count_t maxCount{GetHUGE(kind)};
return max_clock_t <= maxCount ? static_cast<count_t>(max_clock_t)
: static_cast<count_t>(maxCount);
return maxCount;
}

// POSIX implementation using clock_gettime where available. The clock_gettime
// result is in nanoseconds, which is converted as necessary to
// - deciseconds for kind 1
// - milliseconds for kinds 2, 4
// - nanoseconds for kinds 8, 16
constexpr unsigned_count_t DS_PER_SEC{10u};
constexpr unsigned_count_t MS_PER_SEC{1'000u};
constexpr unsigned_count_t NS_PER_SEC{1'000'000'000u};

#ifdef CLOCKID
#ifdef CLOCKID_ELAPSED_TIME
template <typename T = int, typename U = struct timespec>
count_t GetSystemClockCount(int kind, preferred_implementation,
// We need some dummy parameters to pass to decltype(clock_gettime).
T ClockId = 0, U *Timespec = nullptr,
decltype(clock_gettime(ClockId, Timespec)) *Enabled = nullptr) {
struct timespec tspec;
const unsigned_count_t huge{GetHUGE(kind)};
if (clock_gettime(CLOCKID, &tspec) != 0) {
if (clock_gettime(CLOCKID_ELAPSED_TIME, &tspec) != 0) {
return -huge; // failure
}
unsigned_count_t sec{static_cast<unsigned_count_t>(tspec.tv_sec)};
unsigned_count_t nsec{static_cast<unsigned_count_t>(tspec.tv_nsec)};
if (kind >= 8) {
return (sec * NS_PER_SEC + nsec) % (huge + 1);
} else if (kind >= 2) {
return (sec * MS_PER_SEC + (nsec / (NS_PER_SEC / MS_PER_SEC))) % (huge + 1);
} else { // kind == 1
return (sec * DS_PER_SEC + (nsec / (NS_PER_SEC / DS_PER_SEC))) % (huge + 1);
}

// Compute the timestamp as seconds plus nanoseconds in accordance
// with the requested kind at the call site.
return ConvertTimeSpecToCount(kind, tspec);
}
#endif
#endif // CLOCKID_ELAPSED_TIME

template <typename T = int, typename U = struct timespec>
count_t GetSystemClockCountRate(int kind, preferred_implementation,
Expand Down
2 changes: 2 additions & 0 deletions flang/test/Runtime/no-cpp-dep.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@ int32_t RTNAME(ArgumentCount)();
int32_t RTNAME(GetCommandArgument)(int32_t, const struct Descriptor *,
const struct Descriptor *, const struct Descriptor *);
int32_t RTNAME(GetEnvVariable)();
int64_t RTNAME(SystemClockCount)(int kind);

int main() {
double x = RTNAME(CpuTime)();
RTNAME(ProgramStart)(0, 0, 0, 0);
int32_t c = RTNAME(ArgumentCount)();
int32_t v = RTNAME(GetCommandArgument)(0, 0, 0, 0);
int32_t e = RTNAME(GetEnvVariable)("FOO", 0, 0);
int64_t t = RTNAME(SystemClockCount)(8);
return x + c + v + e;
}