Skip to content

Commit d4df77a

Browse files
committed
[libc] Implement 'atexit' on the GPU correctly
Summary: This function was never marked at supported because it was fundamentally broken when called with multiple threads. The patch in #83026 introduces a lock-free stack that can be used to correctly handle enqueuing callbacks from multiple threads. Although the previous interface tried to provide a consistent API, this was not feasible with the needs for a lock-free stack so I have elected to just use ifdefs. The size is fixed to whatever we use for testing, which currently amounts to about 8KiB dedicated for this thing, which isn't enough to be concenred about. Depends on #83026
1 parent 7789fb6 commit d4df77a

File tree

7 files changed

+103
-22
lines changed

7 files changed

+103
-22
lines changed

libc/docs/gpu/support.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ atol |check|
102102
atoll |check|
103103
exit |check| |check|
104104
abort |check| |check|
105+
atexit |check|
105106
labs |check|
106107
llabs |check|
107108
div |check|

libc/src/stdlib/CMakeLists.txt

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -363,20 +363,22 @@ add_entrypoint_object(
363363
libc.src.__support.OSUtil.osutil
364364
)
365365

366-
add_entrypoint_object(
367-
atexit
368-
SRCS
369-
atexit.cpp
370-
HDRS
371-
atexit.h
372-
CXX_STANDARD
373-
20 # For constinit of the atexit callback list.
374-
DEPENDS
375-
libc.src.__support.fixedvector
376-
libc.src.__support.blockstore
377-
libc.src.__support.threads.mutex
378-
libc.src.__support.CPP.new
379-
)
366+
if(NOT LIBC_TARGET_OS_IS_GPU)
367+
add_entrypoint_object(
368+
atexit
369+
SRCS
370+
atexit.cpp
371+
HDRS
372+
atexit.h
373+
CXX_STANDARD
374+
20 # For constinit of the atexit callback list.
375+
DEPENDS
376+
libc.src.__support.fixedvector
377+
libc.src.__support.blockstore
378+
libc.src.__support.threads.mutex
379+
libc.src.__support.CPP.new
380+
)
381+
endif()
380382

381383
add_entrypoint_object(
382384
exit
@@ -398,6 +400,13 @@ add_entrypoint_object(
398400
)
399401

400402
if(LIBC_TARGET_OS_IS_GPU)
403+
add_entrypoint_object(
404+
atexit
405+
ALIAS
406+
DEPENDS
407+
.${LIBC_TARGET_OS}.atexit
408+
)
409+
401410
add_entrypoint_object(
402411
malloc
403412
ALIAS

libc/src/stdlib/atexit.cpp

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,7 @@ struct AtExitUnit {
2828
constexpr AtExitUnit(AtExitCallback *c, void *p) : callback(c), payload(p) {}
2929
};
3030

31-
#if defined(LIBC_TARGET_ARCH_IS_GPU)
32-
// The GPU build cannot handle the potentially recursive definitions required by
33-
// the BlockStore class. Additionally, the liklihood that someone exceeds this
34-
// while executing on the GPU is extremely small.
35-
// FIXME: It is not generally safe to use 'atexit' on the GPU because the
36-
// mutexes simply passthrough. We will need a lock free stack.
37-
using ExitCallbackList = FixedVector<AtExitUnit, 64>;
38-
#elif defined(LIBC_COPT_PUBLIC_PACKAGING)
31+
#if defined(LIBC_COPT_PUBLIC_PACKAGING)
3932
using ExitCallbackList = cpp::ReverseOrderBlockStore<AtExitUnit, 32>;
4033
#else
4134
// BlockStore uses dynamic memory allocation. To avoid dynamic memory

libc/src/stdlib/gpu/CMakeLists.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,16 @@ add_entrypoint_object(
3030
libc.include.stdlib
3131
libc.src.__support.RPC.rpc_client
3232
)
33+
34+
add_entrypoint_object(
35+
atexit
36+
SRCS
37+
atexit.cpp
38+
HDRS
39+
../atexit.h
40+
CXX_STANDARD
41+
20 # For constinit of the atexit callback list.
42+
DEPENDS
43+
libc.include.stdlib
44+
libc.src.__support.fixedstack
45+
)

libc/src/stdlib/gpu/atexit.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
//===-- GPU implementation of atexit --------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/stdlib/atexit.h"
10+
#include "src/__support/common.h"
11+
#include "src/__support/fixedstack.h"
12+
13+
namespace LIBC_NAMESPACE {
14+
15+
namespace {
16+
17+
using AtExitCallback = void(void *);
18+
using StdCAtExitCallback = void(void);
19+
20+
struct AtExitUnit {
21+
AtExitCallback *callback = nullptr;
22+
void *payload = nullptr;
23+
constexpr AtExitUnit() = default;
24+
constexpr AtExitUnit(AtExitCallback *c, void *p) : callback(c), payload(p) {}
25+
};
26+
27+
// The GPU interface cannot use the standard implementation because it does not
28+
// support the Mutex type. Instead we use a lock free stack with a sufficiently
29+
// large size.
30+
constinit FixedStack<AtExitUnit, CALLBACK_LIST_SIZE_FOR_TESTS> exit_callbacks;
31+
32+
void stdc_at_exit_func(void *payload) {
33+
reinterpret_cast<StdCAtExitCallback *>(payload)();
34+
}
35+
36+
} // namespace
37+
38+
namespace internal {
39+
40+
void call_exit_callbacks() {
41+
AtExitUnit unit;
42+
while (exit_callbacks.pop(unit))
43+
unit.callback(unit.payload);
44+
}
45+
46+
} // namespace internal
47+
48+
static int add_atexit_unit(const AtExitUnit &unit) {
49+
if (!exit_callbacks.push(unit))
50+
return -1;
51+
return 0;
52+
}
53+
54+
extern "C" int __cxa_atexit(AtExitCallback *callback, void *payload, void *) {
55+
return add_atexit_unit({callback, payload});
56+
}
57+
58+
LLVM_LIBC_FUNCTION(int, atexit, (StdCAtExitCallback * callback)) {
59+
return add_atexit_unit(
60+
{&stdc_at_exit_func, reinterpret_cast<void *>(callback)});
61+
}
62+
63+
} // namespace LIBC_NAMESPACE

libc/startup/gpu/amdgpu/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ add_startup_object(
55
DEPENDS
66
libc.src.__support.RPC.rpc_client
77
libc.src.__support.GPU.utils
8+
libc.src.__support.CPP.new
89
libc.src.stdlib.exit
910
libc.src.stdlib.atexit
1011
COMPILE_OPTIONS

libc/startup/gpu/nvptx/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ add_startup_object(
55
DEPENDS
66
libc.src.__support.RPC.rpc_client
77
libc.src.__support.GPU.utils
8+
libc.src.__support.CPP.new
89
libc.src.stdlib.exit
910
libc.src.stdlib.atexit
1011
COMPILE_OPTIONS

0 commit comments

Comments
 (0)