Skip to content

Commit 03aa04f

Browse files
committed
[libc] Make 'rand()' thread-safe using atomics instead of TLS
Summary: Currently, we implement the `rand` function using thread-local storage. This is somewhat problematic because not every target supports TLS, and even more do not support non-zero initializers on TLS. The C standard states that the `rand()` function need not be thread, safe. However, many implementations provide thread-safety anyway. There's some confusing language in the 'rationale' section of https://pubs.opengroup.org/onlinepubs/9699919799/functions/rand.html, but given that `glibc` uses a lock, I think we should make this thread safe as well. it mentions that threaded behavior is desirable and can be done in the two ways: 1. A single per-process sequence of pseudo-random numbers that is shared by all threads that call rand() 2. A different sequence of pseudo-random numbers for each thread that calls rand() The current implementation is (2.) and this patch moves it to (1.). This is beneficial for the GPU case and more generic support. The downside is that it's slightly slower to do these atomic operations, the fast path will be two atomic reads and an atomic write.
1 parent b9353f7 commit 03aa04f

File tree

5 files changed

+22
-42
lines changed

5 files changed

+22
-42
lines changed

libc/src/stdlib/rand.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,16 @@ namespace LIBC_NAMESPACE {
1515
// An implementation of the xorshift64star pseudo random number generator. This
1616
// is a good general purpose generator for most non-cryptographics applications.
1717
LLVM_LIBC_FUNCTION(int, rand, (void)) {
18-
unsigned long x = rand_next;
19-
x ^= x >> 12;
20-
x ^= x << 25;
21-
x ^= x >> 27;
22-
rand_next = x;
23-
return static_cast<int>((x * 0x2545F4914F6CDD1Dul) >> 32) & RAND_MAX;
18+
unsigned long orig = rand_next.load(cpp::MemoryOrder::RELAXED);
19+
for (;;) {
20+
unsigned long x = orig;
21+
x ^= x >> 12;
22+
x ^= x << 25;
23+
x ^= x >> 27;
24+
if (rand_next.compare_exchange_weak(orig, x, cpp::MemoryOrder::ACQUIRE,
25+
cpp::MemoryOrder::RELAXED))
26+
return static_cast<int>((x * 0x2545F4914F6CDD1Dul) >> 32) & RAND_MAX;
27+
}
2428
}
2529

2630
} // namespace LIBC_NAMESPACE

libc/src/stdlib/rand_util.cpp

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,13 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "src/stdlib/rand_util.h"
10+
#include "src/__support/CPP/atomic.h"
1011
#include "src/__support/macros/attributes.h"
1112

1213
namespace LIBC_NAMESPACE {
1314

14-
#ifdef LIBC_TARGET_ARCH_IS_GPU
15-
// FIXME: Local GPU memory cannot be initialized so we cannot currently provide
16-
// a standard compliant default value.
17-
ThreadLocal<unsigned long> rand_next;
18-
#else
19-
// C standard 7.10p2: If 'rand' is called before 'srand' it is to proceed as if
20-
// the 'srand' function was called with a value of '1'.
21-
LIBC_THREAD_LOCAL unsigned long rand_next = 1;
22-
#endif
15+
// C standard 7.10p2: If 'rand' is called before 'srand' it is to
16+
// proceed as if the 'srand' function was called with a value of '1'.
17+
cpp::Atomic<unsigned long> rand_next = 1;
2318

2419
} // namespace LIBC_NAMESPACE

libc/src/stdlib/rand_util.h

Lines changed: 5 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,33 +9,15 @@
99
#ifndef LLVM_LIBC_SRC_STDLIB_RAND_UTIL_H
1010
#define LLVM_LIBC_SRC_STDLIB_RAND_UTIL_H
1111

12-
#include "src/__support/GPU/utils.h"
12+
#include "src/__support/CPP/atomic.h"
1313
#include "src/__support/macros/attributes.h"
1414

1515
namespace LIBC_NAMESPACE {
1616

17-
#ifdef LIBC_TARGET_ARCH_IS_GPU
18-
// Implement thread local storage on the GPU using local memory. Each thread
19-
// gets its slot in the local memory array and is private to the group.
20-
// TODO: We need to implement the 'thread_local' keyword on the GPU. This is an
21-
// inefficient and incomplete stand-in until that is done.
22-
template <typename T> class ThreadLocal {
23-
private:
24-
static constexpr long MAX_THREADS = 1024;
25-
[[clang::loader_uninitialized]] static inline gpu::Local<T>
26-
storage[MAX_THREADS];
27-
28-
public:
29-
LIBC_INLINE operator T() const { return storage[gpu::get_thread_id()]; }
30-
LIBC_INLINE void operator=(const T &value) {
31-
storage[gpu::get_thread_id()] = value;
32-
}
33-
};
34-
35-
extern ThreadLocal<unsigned long> rand_next;
36-
#else
37-
extern LIBC_THREAD_LOCAL unsigned long rand_next;
38-
#endif
17+
// The ISO C standard does not explicitly require thread-safe behavior for the
18+
// generic `rand()` function. Some implementations expect it however, so we
19+
// provide it here.
20+
extern cpp::Atomic<unsigned long> rand_next;
3921

4022
} // namespace LIBC_NAMESPACE
4123

libc/src/stdlib/srand.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212

1313
namespace LIBC_NAMESPACE {
1414

15-
LLVM_LIBC_FUNCTION(void, srand, (unsigned int seed)) { rand_next = seed; }
15+
LLVM_LIBC_FUNCTION(void, srand, (unsigned int seed)) {
16+
rand_next.store(seed, cpp::MemoryOrder::RELAXED);
17+
}
1618

1719
} // namespace LIBC_NAMESPACE

libc/test/src/stdlib/rand_test.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,12 @@ TEST(LlvmLibcRandTest, UnsetSeed) {
2323
vals[i] = val;
2424
}
2525

26-
// FIXME: The GPU implementation cannot initialize the seed correctly.
27-
#ifndef LIBC_TARGET_ARCH_IS_GPU
2826
// The C standard specifies that if 'srand' is never called it should behave
2927
// as if 'srand' was called with a value of 1. If we seed the value with 1 we
3028
// should get the same sequence as the unseeded version.
3129
LIBC_NAMESPACE::srand(1);
3230
for (size_t i = 0; i < 1000; ++i)
3331
ASSERT_EQ(LIBC_NAMESPACE::rand(), vals[i]);
34-
#endif
3532
}
3633

3734
TEST(LlvmLibcRandTest, SetSeed) {

0 commit comments

Comments
 (0)