@@ -76,7 +76,11 @@ int ggml_sve_cnt_b = 0;
76
76
#if __has_feature(thread_sanitizer)
77
77
#define GGML_TSAN_ENABLED 1
78
78
#endif
79
+ #else // __has_feature
80
+ #if defined(__SANITIZE_THREAD__)
81
+ #define GGML_TSAN_ENABLED 1
79
82
#endif
83
+ #endif // __has_feature
80
84
81
85
#if defined(_WIN32)
82
86
@@ -3216,20 +3220,24 @@ static void ggml_barrier(struct ggml_threadpool * tp) {
3216
3220
if (n_barrier == (n_threads - 1)) {
3217
3221
// last thread
3218
3222
atomic_store_explicit(&tp->n_barrier, 0, memory_order_relaxed);
3223
+
3224
+ // exit barrier (fill seq-cst fence)
3219
3225
atomic_fetch_add_explicit(&tp->n_barrier_passed, 1, memory_order_seq_cst);
3220
- } else {
3221
- // wait for other threads
3222
- while (atomic_load_explicit(&tp->n_barrier_passed, memory_order_relaxed) == n_passed) {
3223
- ggml_thread_cpu_relax();
3224
- }
3226
+ return;
3227
+ }
3225
3228
3226
- #ifdef GGML_TSAN_ENABLED
3227
- // TSAN doesn't support standalone fence yet, we use a dummy read-modify-write instead
3228
- atomic_fetch_add_explicit(&tp->n_barrier_passed, 0, memory_order_seq_cst);
3229
- #else
3230
- atomic_thread_fence(memory_order_seq_cst);
3231
- #endif
3229
+ // wait for other threads
3230
+ while (atomic_load_explicit(&tp->n_barrier_passed, memory_order_relaxed) == n_passed) {
3231
+ ggml_thread_cpu_relax();
3232
3232
}
3233
+
3234
+ // exit barrier (full seq-cst fence)
3235
+ // TSAN doesn't support standalone fence yet, we use a dummy read-modify-write instead
3236
+ #ifdef GGML_TSAN_ENABLED
3237
+ atomic_fetch_add_explicit(&tp->n_barrier_passed, 0, memory_order_seq_cst);
3238
+ #else
3239
+ atomic_thread_fence(memory_order_seq_cst);
3240
+ #endif
3233
3241
#endif
3234
3242
}
3235
3243
@@ -20260,8 +20268,8 @@ static inline bool ggml_graph_compute_thread_ready(struct ggml_compute_state * s
20260
20268
20261
20269
// sync thread state after polling
20262
20270
static inline void ggml_graph_compute_thread_sync(struct ggml_compute_state * state) {
20263
- #ifdef GGML_TSAN_ENABLED
20264
20271
// TSAN doesn't support standalone fence yet, we use a dummy read-modify-write instead
20272
+ #ifdef GGML_TSAN_ENABLED
20265
20273
atomic_fetch_add_explicit(&state->threadpool->n_graph, 0, memory_order_seq_cst);
20266
20274
#else
20267
20275
atomic_thread_fence(memory_order_seq_cst);
0 commit comments