Skip to content

Commit 71af4ab

Browse files
threadpool: further simplify and improve ggml_barrier
Avoid using strict memory order, yet make sure that all threads go through full memory barrier (memory fence) on barrier entrace and exit. We no longer need thread-sanitizer hacks.
1 parent 09896b1 commit 71af4ab

File tree

1 file changed

+15
-34
lines changed

1 file changed

+15
-34
lines changed

ggml/src/ggml.c

Lines changed: 15 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3177,55 +3177,36 @@ inline static void ggml_critical_section_start(void) {
31773177
}
31783178
}
31793179

3180-
#ifdef GGML_USE_OPENMP
3181-
static void ggml_barrier(struct ggml_threadpool * threadpool) {
3182-
int n_threads = atomic_load_explicit(&threadpool->n_threads_cur, memory_order_relaxed);
3180+
static void ggml_barrier(struct ggml_threadpool * tp) {
3181+
int n_threads = atomic_load_explicit(&tp->n_threads_cur, memory_order_relaxed);
31833182
if (n_threads == 1) {
31843183
return;
31853184
}
31863185

3186+
#ifdef GGML_USE_OPENMP
31873187
#pragma omp barrier
3188-
}
31893188
#else
3190-
static void ggml_barrier(struct ggml_threadpool * threadpool) {
3191-
int n_threads = atomic_load_explicit(&threadpool->n_threads_cur, memory_order_relaxed);
3192-
if (n_threads == 1) {
3193-
return;
3194-
}
3189+
int n_passed = atomic_load_explicit(&tp->n_barrier_passed, memory_order_relaxed);
31953190

3196-
atomic_int * n_barrier = &threadpool->n_barrier;
3197-
atomic_int * n_barrier_passed = &threadpool->n_barrier_passed;
3191+
// enter barrier (full seq-cst fence)
3192+
int n_barrier = atomic_fetch_add_explicit(&tp->n_barrier, 1, memory_order_seq_cst);
31983193

3199-
int passed_old = atomic_load_explicit(n_barrier_passed, memory_order_relaxed);
3200-
3201-
// All threads go through the full fence (memory barrier) operation once to ensure
3202-
// that all previos updates have completed.
3203-
// The rest of the reads and writes can be relaxed, but the thread sanitizer wants
3204-
// to see an explicit acquire / release sequence to declare all futher accesses
3205-
// as safe.
3206-
3207-
memory_order passed_acquire = memory_order_relaxed;
3208-
memory_order passed_release = memory_order_relaxed;
3209-
3210-
#if defined(__has_feature)
3211-
#if __has_feature(thread_sanitizer)
3212-
passed_acquire = memory_order_acquire;
3213-
passed_release = memory_order_release;
3214-
#endif
3215-
#endif
3216-
3217-
if (atomic_fetch_add_explicit(n_barrier, 1, memory_order_seq_cst) == n_threads - 1) {
3194+
int last = 0;
3195+
if (n_barrier == (n_threads - 1)) {
32183196
// last thread
3219-
atomic_store_explicit(n_barrier, 0, memory_order_relaxed);
3220-
atomic_fetch_add_explicit(n_barrier_passed, 1, passed_release);
3197+
atomic_store_explicit(&tp->n_barrier, 0, memory_order_relaxed);
3198+
last = 1;
32213199
} else {
32223200
// wait for other threads
3223-
while (atomic_load_explicit(n_barrier_passed, passed_acquire) == passed_old) {
3201+
while (atomic_load_explicit(&tp->n_barrier_passed, memory_order_relaxed) == n_passed) {
32243202
ggml_thread_cpu_relax();
32253203
}
32263204
}
3227-
}
3205+
3206+
// exit barrier (full seq-cst fence)
3207+
atomic_fetch_add_explicit(&tp->n_barrier_passed, last, memory_order_seq_cst);
32283208
#endif
3209+
}
32293210

32303211
// TODO: make this somehow automatically executed
32313212
// some sort of "sentry" mechanism

0 commit comments

Comments
 (0)