We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 056eb74 commit 91a8ee6Copy full SHA for 91a8ee6
src/llama-graph.cpp
@@ -659,6 +659,28 @@ ggml_tensor * llm_graph_context::build_ffn(
659
cur = ggml_mul(ctx0, x0, x1);
660
cb(cur, "ffn_mul", il);
661
} break;
662
+ case LLM_FFN_GEGLU:
663
+ {
664
+ // Split into two equal parts
665
+ int64_t split_point = cur->ne[0] / 2;
666
+ ggml_tensor * output_ffn_up = ggml_cont(ctx0, ggml_view_2d(
667
+ ctx0, cur, split_point,
668
+ cur->ne[1], cur->nb[1], 0
669
+ ));
670
+ ggml_tensor * output_ffn_gate = ggml_cont(ctx0, ggml_view_2d(
671
672
+ cur->ne[1], cur->nb[1],
673
+ split_point * ggml_element_size(cur)
674
675
+
676
+ // Apply GELU activation function to the first part
677
+ output_ffn_up = ggml_gelu(ctx0, output_ffn_up);
678
+ cb(output_ffn_up, "ffn_gelu", il);
679
680
+ // Element-wise multiplication between the activated part and the gate part
681
+ cur = ggml_mul(ctx0, output_ffn_up, output_ffn_gate);
682
+ cb(cur, "ffn_geglu", il);
683
+ } break;
684
}
685
686
if (gate && type_gate == LLM_FFN_PAR) {
src/llama-graph.h
@@ -36,6 +36,7 @@ enum llm_ffn_op_type {
36
LLM_FFN_RELU,
37
LLM_FFN_RELU_SQR,
38
LLM_FFN_SWIGLU,
39
+ LLM_FFN_GEGLU,
40
};
41
42
enum llm_ffn_gate_type {
0 commit comments