Skip to content

Commit c80f76a

Browse files
committed
Reapply "CUDA: fix race conditions FlashAttention kernels (ggml-org#13438)"
This reverts commit 6a199dd.
1 parent 877447d commit c80f76a

File tree

2 files changed

+3
-0
lines changed

2 files changed

+3
-0
lines changed

ggml/src/ggml-cuda/fattn-mma-f16.cuh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,8 @@ static __device__ __forceinline__ void flash_attn_ext_f16_process_tile(
715715
KQ_crs += __shfl_xor_sync(0xFFFFFFFF, KQ_crs, offset, WARP_SIZE);
716716
}
717717

718+
__syncthreads();
719+
718720
// Write back combined meta data:
719721
#pragma unroll
720722
for (int imeta = 0; imeta < nmeta; ++imeta) {

ggml/src/ggml-cuda/fattn-vec-f16.cuh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ static __global__ void flash_attn_vec_ext_f16(
181181
for (int j = 0; j < ncols; ++j) {
182182
KQ[j*D + tid] = -HALF_MAX_HALF;
183183
}
184+
__syncthreads();
184185

185186
half2 VKQ[ncols] = {{0.0f, 0.0f}};
186187

0 commit comments

Comments
 (0)