Skip to content

Commit 1dacfd1

Browse files
authored
[libc] Round up time for GPU nanosleep implementation (#81630)
Summary: The GPU `nanosleep` tests would occasionally fail. This was due to the fact that we used integer division to determine how many ticks we had to sleep for. This would then truncate, leaving us with a value just slightly below the requested value. This would then occasionally leave us with a return value of `-1`. This patch just changes the code to round up by 1 so we always sleep for at least the requested value.
1 parent d79c3c5 commit 1dacfd1

File tree

1 file changed

+8
-7
lines changed

1 file changed

+8
-7
lines changed

libc/src/time/gpu/nanosleep.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,19 @@
1212

1313
namespace LIBC_NAMESPACE {
1414

15-
constexpr uint64_t TICKS_PER_NS = 1000000000UL;
15+
constexpr uint64_t TICKS_PER_SEC = 1000000000UL;
1616

1717
LLVM_LIBC_FUNCTION(int, nanosleep,
1818
(const struct timespec *req, struct timespec *rem)) {
1919
if (!GPU_CLOCKS_PER_SEC || !req)
2020
return -1;
2121

22-
uint64_t nsecs = req->tv_nsec + req->tv_sec * TICKS_PER_NS;
22+
uint64_t nsecs = req->tv_nsec + req->tv_sec * TICKS_PER_SEC;
23+
uint64_t tick_rate = TICKS_PER_SEC / GPU_CLOCKS_PER_SEC;
2324

2425
uint64_t start = gpu::fixed_frequency_clock();
2526
#if defined(LIBC_TARGET_ARCH_IS_NVPTX) && __CUDA_ARCH__ >= 700
26-
uint64_t end = start + nsecs / (TICKS_PER_NS / GPU_CLOCKS_PER_SEC);
27+
uint64_t end = start + (nsecs + tick_rate - 1) / tick_rate;
2728
uint64_t cur = gpu::fixed_frequency_clock();
2829
// The NVPTX architecture supports sleeping and guaruntees the actual time
2930
// slept will be somewhere between zero and twice the requested amount. Here
@@ -34,7 +35,7 @@ LLVM_LIBC_FUNCTION(int, nanosleep,
3435
nsecs -= nsecs > cur - start ? cur - start : 0;
3536
}
3637
#elif defined(LIBC_TARGET_ARCH_IS_AMDGPU)
37-
uint64_t end = start + nsecs / (TICKS_PER_NS / GPU_CLOCKS_PER_SEC);
38+
uint64_t end = start + (nsecs + tick_rate - 1) / tick_rate;
3839
uint64_t cur = gpu::fixed_frequency_clock();
3940
// The AMDGPU architecture does not provide a sleep implementation with a
4041
// known delay so we simply repeatedly sleep with a large value of ~960 clock
@@ -56,11 +57,11 @@ LLVM_LIBC_FUNCTION(int, nanosleep,
5657

5758
// Check to make sure we slept for at least the desired duration and set the
5859
// remaining time if not.
59-
uint64_t elapsed = (stop - start) * (TICKS_PER_NS / GPU_CLOCKS_PER_SEC);
60+
uint64_t elapsed = (stop - start) * tick_rate;
6061
if (elapsed < nsecs) {
6162
if (rem) {
62-
rem->tv_sec = (nsecs - elapsed) / TICKS_PER_NS;
63-
rem->tv_nsec = (nsecs - elapsed) % TICKS_PER_NS;
63+
rem->tv_sec = (nsecs - elapsed) / TICKS_PER_SEC;
64+
rem->tv_nsec = (nsecs - elapsed) % TICKS_PER_SEC;
6465
}
6566
return -1;
6667
}

0 commit comments

Comments
 (0)