Skip to content

Commit 970880c

Browse files
committed
[libc] Partially implement 'rand' for the GPU
Summary: This patch partially implements the `rand` function on the GPU. This is partial because the GPU currently doesn't support thread local storage or static initializers. To implement this on the GPU. I use 1/8th of the local / shared memory quota to treak the shared memory as thread local storage. This is done by simply allocating enough storage for each thread in the block and indexing into this based off of the thread id. The downside to this is that it does not initialize `srand` correctly to be `1` as the standard says, it is also wasteful. In the future we should figure out a way to support TLS on the GPU so that this can be completely common and less resource intensive.
1 parent 75e6480 commit 970880c

File tree

5 files changed

+39
-4
lines changed

5 files changed

+39
-4
lines changed

libc/config/gpu/entrypoints.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ set(TARGET_LIBC_ENTRYPOINTS
6363
libc.src.stdlib.lldiv
6464
libc.src.stdlib.qsort
6565
libc.src.stdlib.qsort_r
66+
libc.src.stdlib.rand
67+
libc.src.stdlib.srand
6668
libc.src.stdlib.strtod
6769
libc.src.stdlib.strtof
6870
libc.src.stdlib.strtol

libc/src/stdlib/rand.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,12 @@ namespace LIBC_NAMESPACE {
1515
// An implementation of the xorshift64star pseudo random number generator. This
1616
// is a good general purpose generator for most non-cryptographics applications.
1717
LLVM_LIBC_FUNCTION(int, rand, (void)) {
18-
rand_next ^= rand_next >> 12;
19-
rand_next ^= rand_next << 25;
20-
rand_next ^= rand_next >> 27;
21-
return static_cast<int>((rand_next * 0x2545F4914F6CDD1Dul) >> 32) & RAND_MAX;
18+
unsigned long x = rand_next;
19+
x ^= x >> 12;
20+
x ^= x << 25;
21+
x ^= x >> 27;
22+
rand_next = x;
23+
return static_cast<int>((x * 0x2545F4914F6CDD1Dul) >> 32) & RAND_MAX;
2224
}
2325

2426
} // namespace LIBC_NAMESPACE

libc/src/stdlib/rand_util.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,14 @@
1111

1212
namespace LIBC_NAMESPACE {
1313

14+
#ifdef LIBC_TARGET_ARCH_IS_GPU
15+
// FIXME: Local GPU memory cannot be initialized so we cannot currently provide
16+
// a standard compliant default value.
17+
ThreadLocal<unsigned long> rand_next;
18+
#else
1419
// C standard 7.10p2: If 'rand' is called before 'srand' it is to proceed as if
1520
// the 'srand' function was called with a value of '1'.
1621
LIBC_THREAD_LOCAL unsigned long rand_next = 1;
22+
#endif
1723

1824
} // namespace LIBC_NAMESPACE

libc/src/stdlib/rand_util.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,33 @@
99
#ifndef LLVM_LIBC_SRC_STDLIB_RAND_UTIL_H
1010
#define LLVM_LIBC_SRC_STDLIB_RAND_UTIL_H
1111

12+
#include "src/__support/GPU/utils.h"
1213
#include "src/__support/macros/attributes.h"
1314

1415
namespace LIBC_NAMESPACE {
1516

17+
#ifdef LIBC_TARGET_ARCH_IS_GPU
18+
// Implement thread local storage on the GPU using local memory. Each thread
19+
// gets its slot in the local memory array and is private to the group.
20+
// TODO: We need to implement the 'thread_local' keyword on the GPU. This is an
21+
// inefficient and incomplete stand-in until that is done.
22+
template <typename T> class ThreadLocal {
23+
private:
24+
static constexpr long MAX_THREADS = 1024;
25+
[[clang::loader_uninitialized]] static inline gpu::Local<T>
26+
storage[MAX_THREADS];
27+
28+
public:
29+
LIBC_INLINE operator T() const { return storage[gpu::get_thread_id()]; }
30+
LIBC_INLINE void operator=(const T &value) {
31+
storage[gpu::get_thread_id()] = value;
32+
}
33+
};
34+
35+
extern ThreadLocal<unsigned long> rand_next;
36+
#else
1637
extern LIBC_THREAD_LOCAL unsigned long rand_next;
38+
#endif
1739

1840
} // namespace LIBC_NAMESPACE
1941

libc/test/src/stdlib/rand_test.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,15 @@ TEST(LlvmLibcRandTest, UnsetSeed) {
2323
vals[i] = val;
2424
}
2525

26+
// FIXME: The GPU implementation cannot initialize the seed correctly.
27+
#ifndef LIBC_TARGET_ARCH_IS_GPU
2628
// The C standard specifies that if 'srand' is never called it should behave
2729
// as if 'srand' was called with a value of 1. If we seed the value with 1 we
2830
// should get the same sequence as the unseeded version.
2931
LIBC_NAMESPACE::srand(1);
3032
for (size_t i = 0; i < 1000; ++i)
3133
ASSERT_EQ(LIBC_NAMESPACE::rand(), vals[i]);
34+
#endif
3235
}
3336

3437
TEST(LlvmLibcRandTest, SetSeed) {

0 commit comments

Comments
 (0)