Skip to content

Commit ad1dfd7

Browse files
authored
[Proxying][NFC] Move deferred cleanup logic to em_task_queue (#18851)
We previously had a scheme to defer freeing of `em_proyxing_queue`s until there were no more outstanding references to their `em_task_queues` sitting in message queues. Since the notification messages only ever contain references to `em_task_queue`s and not the `em_proxying_queue`s, move the deferred cleanup logic to the `em_task_queue` layer from the `em_proxying_queue` layer. This slightly simplifies the code and is a cleaner separation of concerns. This is NFC as far as users are concerned, and the code is moved with only two changes to internal behavior. First, we now use a trylock when culling zombies to avoid blocking when multiple threads are creating `em_task_queue`s at the same time. Second, we now enqueue zombie queues at the tail of the zombie list instead of the head because FIFO behavior seems fairer. The test for the zombie culling behavior is necessarily less precise now because it the objects with deferred cleanup are no longer directly user-visible. The test still does its job, though.
1 parent cd04e72 commit ad1dfd7

File tree

4 files changed

+138
-158
lines changed

4 files changed

+138
-158
lines changed

system/lib/pthread/em_task_queue.c

Lines changed: 93 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
* found in the LICENSE file.
66
*/
77

8+
#include <assert.h>
89
#include <emscripten/threading.h>
910
#include <stdatomic.h>
1011
#include <stdlib.h>
@@ -15,7 +16,82 @@
1516

1617
#define EM_TASK_QUEUE_INITIAL_CAPACITY 128
1718

19+
// Task Queue Lifetime Management
20+
// -------------------------------
21+
//
22+
// When tasks are added to a task queue, the Worker running the target thread
23+
// receives an event that will cause it to execute the queue when it next
24+
// returns to its event loop. In some cases the queue will already have been
25+
// executed before then, but the event is still received and the queue is still
26+
// executed. These events contain references to the queue so that the target
27+
// thread will know which queue to execute.
28+
//
29+
// To avoid use-after-free bugs, we cannot free a task queue immediately when
30+
// `em_task_queue_destroy` is called; instead, we must defer freeing the queue
31+
// until all of its outstanding notifications have been processed. We defer
32+
// freeing the queue using an atomic flag. Each time a notification containing a
33+
// reference to a task queue is generated, we set the flag on that task queue.
34+
// Each time that task queue is processed, we clear the flag as long as another
35+
// notification for the queue has not been generated in the mean time. The
36+
// proxying queue can only be freed once `em_task_queue_destroy` has been called
37+
// and its notification flag has been cleared.
38+
//
39+
// But an extra complication is that the target thread may have died by the time
40+
// it gets back to its event loop to process its notifications. In that case the
41+
// thread's Worker will still receive a notification and have to clear the
42+
// notification flag without a live runtime. Without a live runtime, there is no
43+
// stack, so the worker cannot safely free the queue at this point even if the
44+
// notification flag is cleared. We need a separate thread with a live runtime
45+
// to perform the free.
46+
//
47+
// To ensure that queues are eventually freed, we place destroyed queues in a
48+
// global "zombie list" where they wait for their notification flags to be
49+
// cleared. The zombie list is scanned and zombie queues without outstanding
50+
// notifications are freed whenever a new queue is constructed. In principle the
51+
// zombie list could be scanned at any time, but the queue constructor is a nice
52+
// place to do it because scanning there is sufficient to keep the number of
53+
// zombie queues from growing without bound; creating a new zombie ultimately
54+
// requires creating a new queue.
55+
//
56+
// -------------------------------
57+
58+
// The head of the zombie list. Its mutex protects access to the list and its
59+
// other fields are not used.
60+
static em_task_queue zombie_list_head = {.mutex = PTHREAD_MUTEX_INITIALIZER,
61+
.zombie_prev = &zombie_list_head,
62+
.zombie_next = &zombie_list_head};
63+
64+
static void em_task_queue_free(em_task_queue* queue) {
65+
pthread_mutex_destroy(&queue->mutex);
66+
free(queue->tasks);
67+
free(queue);
68+
}
69+
70+
static void cull_zombies() {
71+
if (pthread_mutex_trylock(&zombie_list_head.mutex) != 0) {
72+
// Some other thread is already culling. In principle there may be new
73+
// cullable zombies after it finishes, but it's not worth waiting to find
74+
// out.
75+
return;
76+
}
77+
em_task_queue* curr = zombie_list_head.zombie_next;
78+
while (curr != &zombie_list_head) {
79+
em_task_queue* next = curr->zombie_next;
80+
if (curr->notification == NOTIFICATION_NONE) {
81+
// Remove the zombie from the list and free it.
82+
curr->zombie_prev->zombie_next = curr->zombie_next;
83+
curr->zombie_next->zombie_prev = curr->zombie_prev;
84+
em_task_queue_free(curr);
85+
}
86+
curr = next;
87+
}
88+
pthread_mutex_unlock(&zombie_list_head.mutex);
89+
}
90+
1891
em_task_queue* em_task_queue_create(pthread_t thread) {
92+
// Free any queue that has been destroyed and is safe to free.
93+
cull_zombies();
94+
1995
em_task_queue* queue = malloc(sizeof(em_task_queue));
2096
if (queue == NULL) {
2197
return NULL;
@@ -32,14 +108,27 @@ em_task_queue* em_task_queue_create(pthread_t thread) {
32108
.tasks = tasks,
33109
.capacity = EM_TASK_QUEUE_INITIAL_CAPACITY,
34110
.head = 0,
35-
.tail = 0};
111+
.tail = 0,
112+
.zombie_prev = NULL,
113+
.zombie_next = NULL};
36114
return queue;
37115
}
38116

39117
void em_task_queue_destroy(em_task_queue* queue) {
40-
pthread_mutex_destroy(&queue->mutex);
41-
free(queue->tasks);
42-
free(queue);
118+
assert(queue->zombie_next == NULL && queue->zombie_prev == NULL);
119+
if (queue->notification == NOTIFICATION_NONE) {
120+
// No outstanding references to the queue, so we can go ahead and free it.
121+
em_task_queue_free(queue);
122+
return;
123+
}
124+
// Otherwise add the queue to the zombie list so that it will eventually be
125+
// freed safely.
126+
pthread_mutex_lock(&zombie_list_head.mutex);
127+
queue->zombie_next = &zombie_list_head;
128+
queue->zombie_prev = zombie_list_head.zombie_prev;
129+
queue->zombie_next->zombie_prev = queue;
130+
queue->zombie_prev->zombie_next = queue;
131+
pthread_mutex_unlock(&zombie_list_head.mutex);
43132
}
44133

45134
// Not thread safe. Returns 1 on success and 0 on failure.

system/lib/pthread/em_task_queue.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@ typedef struct task {
1717
void* arg;
1818
} task;
1919

20-
// A task queue holding tasks to be processed by a particular thread.
20+
// A task queue holding tasks to be processed by a particular thread. The only
21+
// "public" field is `notification`. All other fields should be considered
22+
// private implementation details.
2123
typedef struct em_task_queue {
2224
// Flag encoding the state of postMessage notifications for this task queue.
2325
// Accessed directly from JS, so must be the first member.
@@ -30,15 +32,18 @@ typedef struct em_task_queue {
3032
// Recursion guard. Only accessed on the target thread, so there's no need to
3133
// hold the lock when accessing it. TODO: We disallow recursive processing
3234
// because that's what the old proxying API does, so it is safer to start with
33-
// the same behavior. Experiment with relaxing this restriction once the old
34-
// API uses these queues as well.
35+
// the same behavior. Experiment with relaxing this restriction.
3536
int processing;
3637
// Ring buffer of tasks of size `capacity`. New tasks are enqueued at
3738
// `tail` and dequeued at `head`.
3839
task* tasks;
3940
int capacity;
4041
int head;
4142
int tail;
43+
// Doubly linked list pointers for the zombie list. See em_task_queue.c for
44+
// details.
45+
struct em_task_queue* zombie_prev;
46+
struct em_task_queue* zombie_next;
4247
} em_task_queue;
4348

4449
em_task_queue* em_task_queue_create(pthread_t thread);

system/lib/pthread/proxying.c

Lines changed: 18 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -15,155 +15,52 @@
1515
#include "em_task_queue.h"
1616
#include "proxying_notification_state.h"
1717

18-
// Proxy Queue Lifetime Management
19-
// -------------------------------
20-
//
21-
// Proxied tasks are executed either when the user manually calls
22-
// `emscripten_proxy_execute_queue` on the target thread or when the target
23-
// thread returns to the event loop. The queue does not know which execution
24-
// path will be used ahead of time when the work is proxied, so it must
25-
// conservatively send a message to the target thread's event loop in case the
26-
// user expects the event loop to drive the execution. These notifications
27-
// contain references to the queue that will be dereferenced when the target
28-
// thread returns to its event loop and receives the notification, even if the
29-
// user manages the execution of the queue themselves.
30-
//
31-
// To avoid use-after-free bugs, we cannot free a queue immediately when a user
32-
// calls `em_proxying_queue_destroy`; instead, we have to defer freeing the
33-
// queue until all of its outstanding notifications have been processed. We
34-
// defer freeing the queue using a reference counting scheme. Each time a
35-
// notification containing a reference to the a thread-local task queue is
36-
// generated, we set a flag on that task queue. Each time that task queue is
37-
// processed, we clear the flag. The proxying queue can only be freed once
38-
// `em_proxying_queue_destroy` has been called and the notification flags on
39-
// each of its task queues have been cleared.
40-
//
41-
// But an extra complication is that the target thread may have died by the time
42-
// it gets back to its event loop to process its notifications. This can happen
43-
// when a user proxies some work to a thread, then calls
44-
// `emscripten_proxy_execute_queue` on that thread, then destroys the queue and
45-
// exits the thread. In that situation no work will be dropped, but the thread's
46-
// worker will still receive a notification and have to clear the notification
47-
// flag without a live runtime. Without a live runtime, there is no stack, so
48-
// the worker cannot safely free the queue at this point even if the refcount
49-
// goes to zero. We need a separate thread with a live runtime to perform the
50-
// free.
51-
//
52-
// To ensure that queues are eventually freed, we place destroyed queues in a
53-
// global "zombie list" where they wait for their notification flags to be
54-
// cleared. The zombie list is scanned whenever a new queue is constructed and
55-
// any of the zombie queues without outstanding notifications are freed. In
56-
// principle the zombie list could be scanned at any time, but the queue
57-
// constructor is a nice place to do it because scanning there is sufficient to
58-
// keep the number of zombie queues from growing without bound; creating a new
59-
// zombie ultimately requires creating a new queue.
60-
//
61-
// -------------------------------
62-
6318
struct em_proxying_queue {
6419
// Protects all accesses to em_task_queues, size, and capacity.
6520
pthread_mutex_t mutex;
6621
// `size` task queue pointers stored in an array of size `capacity`.
6722
em_task_queue** task_queues;
6823
int size;
6924
int capacity;
70-
// Doubly linked list pointers for the zombie list.
71-
em_proxying_queue* zombie_prev;
72-
em_proxying_queue* zombie_next;
7325
};
7426

7527
// The system proxying queue.
76-
static em_proxying_queue system_proxying_queue = {.mutex =
77-
PTHREAD_MUTEX_INITIALIZER,
78-
.task_queues = NULL,
79-
.size = 0,
80-
.capacity = 0,
81-
.zombie_prev = NULL,
82-
.zombie_next = NULL};
28+
static em_proxying_queue system_proxying_queue = {
29+
.mutex = PTHREAD_MUTEX_INITIALIZER,
30+
.task_queues = NULL,
31+
.size = 0,
32+
.capacity = 0,
33+
};
8334

8435
em_proxying_queue* emscripten_proxy_get_system_queue(void) {
8536
return &system_proxying_queue;
8637
}
8738

88-
// The head of the zombie list. Its mutex protects access to the list and its
89-
// other fields are not used.
90-
static em_proxying_queue zombie_list_head = {.mutex = PTHREAD_MUTEX_INITIALIZER,
91-
.zombie_prev = &zombie_list_head,
92-
.zombie_next = &zombie_list_head};
93-
94-
static void em_proxying_queue_free(em_proxying_queue* q) {
95-
pthread_mutex_destroy(&q->mutex);
96-
for (int i = 0; i < q->size; i++) {
97-
em_task_queue_destroy(q->task_queues[i]);
98-
}
99-
free(q->task_queues);
100-
free(q);
101-
}
102-
103-
// Does not lock `q` because it should only be called after `q` has been
104-
// destroyed when it would be UB for new work to come in and race to generate a
105-
// new notification.
106-
static int has_notification(em_proxying_queue* q) {
107-
for (int i = 0; i < q->size; i++) {
108-
if (q->task_queues[i]->notification != NOTIFICATION_NONE) {
109-
return 1;
110-
}
111-
}
112-
return 0;
113-
}
114-
115-
static void cull_zombies() {
116-
pthread_mutex_lock(&zombie_list_head.mutex);
117-
em_proxying_queue* curr = zombie_list_head.zombie_next;
118-
while (curr != &zombie_list_head) {
119-
em_proxying_queue* next = curr->zombie_next;
120-
if (!has_notification(curr)) {
121-
// Remove the zombie from the list and free it.
122-
curr->zombie_prev->zombie_next = curr->zombie_next;
123-
curr->zombie_next->zombie_prev = curr->zombie_prev;
124-
em_proxying_queue_free(curr);
125-
}
126-
curr = next;
127-
}
128-
pthread_mutex_unlock(&zombie_list_head.mutex);
129-
}
130-
13139
em_proxying_queue* em_proxying_queue_create(void) {
132-
// Free any queue that has been destroyed and is safe to free.
133-
cull_zombies();
134-
13540
// Allocate the new queue.
13641
em_proxying_queue* q = malloc(sizeof(em_proxying_queue));
13742
if (q == NULL) {
13843
return NULL;
13944
}
140-
*q = (em_proxying_queue){.mutex = PTHREAD_MUTEX_INITIALIZER,
141-
.task_queues = NULL,
142-
.size = 0,
143-
.capacity = 0,
144-
.zombie_prev = NULL,
145-
.zombie_next = NULL};
45+
*q = (em_proxying_queue){
46+
.mutex = PTHREAD_MUTEX_INITIALIZER,
47+
.task_queues = NULL,
48+
.size = 0,
49+
.capacity = 0,
50+
};
14651
return q;
14752
}
14853

14954
void em_proxying_queue_destroy(em_proxying_queue* q) {
15055
assert(q != NULL);
15156
assert(q != &system_proxying_queue && "cannot destroy system proxying queue");
152-
assert(!q->zombie_next && !q->zombie_prev &&
153-
"double freeing em_proxying_queue!");
154-
if (!has_notification(q)) {
155-
// No outstanding references to the queue, so we can go ahead and free it.
156-
em_proxying_queue_free(q);
157-
return;
57+
58+
pthread_mutex_destroy(&q->mutex);
59+
for (int i = 0; i < q->size; i++) {
60+
em_task_queue_destroy(q->task_queues[i]);
15861
}
159-
// Otherwise add the queue to the zombie list so that it will eventually be
160-
// freed safely.
161-
pthread_mutex_lock(&zombie_list_head.mutex);
162-
q->zombie_next = zombie_list_head.zombie_next;
163-
q->zombie_prev = &zombie_list_head;
164-
q->zombie_next->zombie_prev = q;
165-
q->zombie_prev->zombie_next = q;
166-
pthread_mutex_unlock(&zombie_list_head.mutex);
62+
free(q->task_queues);
63+
free(q);
16764
}
16865

16966
// Not thread safe. Returns NULL if there are no tasks for the thread.

0 commit comments

Comments
 (0)