We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent b775345 commit ffd0eaeCopy full SHA for ffd0eae
ggml/src/ggml-cuda/fattn-vec-f16.cuh
@@ -212,6 +212,7 @@ static __global__ void flash_attn_vec_ext_f16(
212
}
213
214
if (__all_sync(0xFFFFFFFF, skip)) {
215
+ __syncthreads();
216
continue;
217
218
#endif // GGML_USE_HIP
ggml/src/ggml-cuda/fattn-vec-f32.cuh
@@ -217,6 +217,7 @@ static __global__ void flash_attn_vec_ext_f32(
219
220
221
222
223
0 commit comments