@@ -3198,16 +3198,27 @@ static void ggml_barrier(struct ggml_threadpool * threadpool) {
3198
3198
3199
3199
int passed_old = atomic_load_explicit(n_barrier_passed, memory_order_relaxed);
3200
3200
3201
- if (atomic_fetch_add(n_barrier, 1) == n_threads - 1) {
3201
+ // All threads go through the full fence (memory barrier) operation once to ensure
3202
+ // that all previos updates have completed.
3203
+ // The rest of the reads and writes can be relaxed, but the thread sanitizer wants
3204
+ // to see an explicit acquire / release sequence to declare all futher accesses
3205
+ // as safe.
3206
+
3207
+ #if defined(__has_feature) && __has_feature(thread_sanitizer)
3208
+ #define passed_acquire memory_order_acquire
3209
+ #define passed_release memory_order_release
3210
+ #else
3211
+ #define passed_acquire memory_order_relaxed
3212
+ #define passed_release memory_order_relaxed
3213
+ #endif
3214
+
3215
+ if (atomic_fetch_add_explicit(n_barrier, 1, memory_order_seq_cst) == n_threads - 1) {
3202
3216
// last thread
3203
- atomic_store (n_barrier, 0);
3204
- atomic_fetch_add_explicit(n_barrier_passed, 1, memory_order_relaxed );
3217
+ atomic_store_explicit (n_barrier, 0, memory_order_relaxed );
3218
+ atomic_fetch_add_explicit(n_barrier_passed, 1, passed_release );
3205
3219
} else {
3206
3220
// wait for other threads
3207
- while (true) {
3208
- if (atomic_load_explicit(n_barrier_passed, memory_order_relaxed) != passed_old) {
3209
- return;
3210
- }
3221
+ while (atomic_load_explicit(n_barrier_passed, passed_acquire) == passed_old) {
3211
3222
ggml_thread_cpu_relax();
3212
3223
}
3213
3224
}
0 commit comments