Use warp_size instead of WARP_SIZE

gaugarg-nv · gaugarg-nv · commit 153bb2628fd4 · 2025-03-06T14:02:23.000+05:30
diff --git a/ggml/src/ggml-cuda/fattn.cu b/ggml/src/ggml-cuda/fattn.cu
@@ -293,7 +293,7 @@ void ggml_cuda_flash_attn_ext(ggml_backend_cuda_context & ctx, ggml_tensor * dst
         return;
     }
 
-    if (Q->ne[1] == 1 && Q->ne[0] % (2*WARP_SIZE) == 0) {
+    if (Q->ne[1] == 1 && Q->ne[0] % (2*warp_size) == 0) {
         if (prec == GGML_PREC_DEFAULT) {
             ggml_cuda_flash_attn_ext_vec_f16(ctx, dst);
             return;

Original file line number	Diff line number	Diff line change
`@@ -293,7 +293,7 @@ void ggml_cuda_flash_attn_ext(ggml_backend_cuda_context & ctx, ggml_tensor * dst`
`293`	`293`	`return;`
`294`	`294`	`}`
`295`	`295`
`296`		`- if (Q->ne[1] == 1 && Q->ne[0] % (2*WARP_SIZE) == 0) {`
	`296`	`+ if (Q->ne[1] == 1 && Q->ne[0] % (2*warp_size) == 0) {`
`297`	`297`	`if (prec == GGML_PREC_DEFAULT) {`
`298`	`298`	`ggml_cuda_flash_attn_ext_vec_f16(ctx, dst);`
`299`	`299`	`return;`