Skip to content

Commit ff0d3f6

Browse files
CUDA: fix Volta FlashAttention logic
1 parent d92cb67 commit ff0d3f6

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

ggml/src/ggml-cuda/fattn.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ void ggml_cuda_flash_attn_ext(ggml_backend_cuda_context & ctx, ggml_tensor * dst
235235
return;
236236
}
237237

238-
if (!new_mma_available(cc)) {
238+
if (!fp16_mma_available(cc)) {
239239
if (prec == GGML_PREC_DEFAULT) {
240240
if (Q->ne[1] <= 8) {
241241
ggml_cuda_flash_attn_ext_vec_f16(ctx, dst);

0 commit comments

Comments
 (0)