Skip to content

[libc] Round up time for GPU nanosleep implementation #81630

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 13, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions libc/src/time/gpu/nanosleep.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,19 @@

namespace LIBC_NAMESPACE {

constexpr uint64_t TICKS_PER_NS = 1000000000UL;
constexpr uint64_t TICKS_PER_SEC = 1000000000UL;

LLVM_LIBC_FUNCTION(int, nanosleep,
(const struct timespec *req, struct timespec *rem)) {
if (!GPU_CLOCKS_PER_SEC || !req)
return -1;

uint64_t nsecs = req->tv_nsec + req->tv_sec * TICKS_PER_NS;
uint64_t nsecs = req->tv_nsec + req->tv_sec * TICKS_PER_SEC;
uint64_t tick_rate = TICKS_PER_SEC / GPU_CLOCKS_PER_SEC;

uint64_t start = gpu::fixed_frequency_clock();
#if defined(LIBC_TARGET_ARCH_IS_NVPTX) && __CUDA_ARCH__ >= 700
uint64_t end = start + nsecs / (TICKS_PER_NS / GPU_CLOCKS_PER_SEC);
uint64_t end = start + (nsecs + tick_rate - 1) / tick_rate;
uint64_t cur = gpu::fixed_frequency_clock();
// The NVPTX architecture supports sleeping and guaruntees the actual time
// slept will be somewhere between zero and twice the requested amount. Here
Expand All @@ -34,7 +35,7 @@ LLVM_LIBC_FUNCTION(int, nanosleep,
nsecs -= nsecs > cur - start ? cur - start : 0;
}
#elif defined(LIBC_TARGET_ARCH_IS_AMDGPU)
uint64_t end = start + nsecs / (TICKS_PER_NS / GPU_CLOCKS_PER_SEC);
uint64_t end = start + (nsecs + tick_rate - 1) / tick_rate;
uint64_t cur = gpu::fixed_frequency_clock();
// The AMDGPU architecture does not provide a sleep implementation with a
// known delay so we simply repeatedly sleep with a large value of ~960 clock
Expand All @@ -56,11 +57,11 @@ LLVM_LIBC_FUNCTION(int, nanosleep,

// Check to make sure we slept for at least the desired duration and set the
// remaining time if not.
uint64_t elapsed = (stop - start) * (TICKS_PER_NS / GPU_CLOCKS_PER_SEC);
uint64_t elapsed = (stop - start) * tick_rate;
if (elapsed < nsecs) {
if (rem) {
rem->tv_sec = (nsecs - elapsed) / TICKS_PER_NS;
rem->tv_nsec = (nsecs - elapsed) % TICKS_PER_NS;
rem->tv_sec = (nsecs - elapsed) / TICKS_PER_SEC;
rem->tv_nsec = (nsecs - elapsed) % TICKS_PER_SEC;
}
return -1;
}
Expand Down