move ggml_can_fuse to a common function

jeffbolznv · jeffbolznv · commit 5e13dcf75b23 · 2025-06-26T13:46:19.000-05:00
diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h
@@ -12,6 +12,10 @@
 #include <stdint.h>
 #include <string.h>
 
+#ifdef __cplusplus
+#include <initializer_list>
+#endif
+
 #ifdef __ARM_FEATURE_SVE
 #include <arm_sve.h>
 #endif // __ARM_FEATURE_SVE
@@ -467,9 +471,10 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
 #define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
 #define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)
 
+#ifdef __cplusplus
 // return true if the node's results are only used by N other nodes
 // and can be fused into their calculations.
-static inline bool ggml_can_fuse_node(const struct ggml_tensor * node, int32_t N) {
+static inline bool ggml_node_has_N_uses(const struct ggml_tensor * node, int32_t N) {
     // check the use count against how many we're replacing
     if (node->use_count != N) {
         return false;
@@ -489,6 +494,40 @@ static inline bool ggml_can_fuse_node(const struct ggml_tensor * node, int32_t N
     return true;
 }
 
+// Returns true if nodes [i, i+ops.size()) are the sequence of ggml_ops in ops[]
+// and are fusable. Nodes are considered fusable according to this function if:
+// - all nodes except the last have only one use and are not views/outputs (see ggml_node_has_N_uses).
+// - all nodes except the last are src[0] of the following node.
+// - all nodes are the same shape.
+// TODO: Consider allowing GGML_OP_NONE nodes in between
+static bool ggml_can_fuse(struct ggml_cgraph * cgraph, int node_idx, std::initializer_list<enum ggml_op> ops) {
+    size_t num_ops = ops.size();
+    if (node_idx + num_ops > cgraph->n_nodes) {
+        return false;
+    }
+
+    for (size_t i = 0; i < num_ops; ++i) {
+        struct ggml_tensor *node = cgraph->nodes[node_idx + i];
+        if (node->op != ops.begin()[i]) {
+            return false;
+        }
+        if (i < num_ops && !ggml_node_has_N_uses(node, 1)) {
+            return false;
+        }
+        if (i > 0) {
+            struct ggml_tensor *prev = cgraph->nodes[node_idx + i - 1];
+            if (node->src[0] != prev) {
+                return false;
+            }
+            if (!ggml_are_same_shape(node, prev)) {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -9697,32 +9697,6 @@ static bool ggml_vk_is_empty(ggml_tensor * node) {
     return ggml_is_empty(node) || node->op == GGML_OP_NONE || node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE;
 }
 
-// Returns true if nodes [i, i+1] are fusable RMS_NORM + MUL.
-static bool ggml_can_fuse_rms_norm_mul(ggml_backend_vk_context * ctx, ggml_cgraph * cgraph, int i) {
-    ggml_tensor *norm = cgraph->nodes[i];
-
-    if (norm->op != GGML_OP_RMS_NORM) {
-        return false;
-    }
-
-    if (!ggml_can_fuse_node(norm, 1)) {
-        return false;
-    }
-
-    if (i + 1 >= cgraph->n_nodes) {
-        return false;
-    }
-    ggml_tensor *mul = cgraph->nodes[i + 1];
-    if (mul->op != GGML_OP_MUL || mul->src[0] != norm) {
-        return false;
-    }
-
-    // Since norm is the first operand of mul, it must be the same shape
-    GGML_ASSERT(ggml_are_same_shape(mul, norm));
-
-    return true;
-}
-
 static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
     VK_LOG_DEBUG("ggml_backend_vk_graph_compute(" << cgraph->n_nodes << " nodes)");
     ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context;
@@ -9736,7 +9710,7 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
 
     uint64_t total_mat_mul_bytes = 0;
     for (int i = 0; i < cgraph->n_nodes; i++) {
-        if (ggml_can_fuse_rms_norm_mul(ctx, cgraph, i)) {
+        if (ggml_can_fuse(cgraph, i, { GGML_OP_RMS_NORM, GGML_OP_MUL })) {
             ctx->num_additional_fused_ops = 1;
         }
         ggml_vk_build_graph(ctx, cgraph, cgraph->nodes[i], i, nullptr, 0, true, false, false, false);
@@ -9806,7 +9780,7 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
             mul_mat_bytes += ggml_nbytes(cgraph->nodes[i]->src[0]);
         }
 
-        if (ggml_can_fuse_rms_norm_mul(ctx, cgraph, i)) {
+        if (ggml_can_fuse(cgraph, i, { GGML_OP_RMS_NORM, GGML_OP_MUL })) {
             ctx->num_additional_fused_ops = 1;
         }