@@ -1926,8 +1926,9 @@ struct llama_layer {
1926
1926
struct ggml_tensor * ffn_up_shexp;
1927
1927
1928
1928
// ff bias
1929
- struct ggml_tensor * ffn_down_b; // b2
1930
- struct ggml_tensor * ffn_up_b; // b3
1929
+ struct ggml_tensor * ffn_gate_b = nullptr;
1930
+ struct ggml_tensor * ffn_down_b = nullptr; // b2
1931
+ struct ggml_tensor * ffn_up_b = nullptr; // b3
1931
1932
struct ggml_tensor * ffn_act;
1932
1933
1933
1934
// mamba proj
@@ -5062,6 +5063,11 @@ static bool llm_load_tensors(
5062
5063
layer.ffn_gate = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff});
5063
5064
layer.ffn_down = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd});
5064
5065
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
5066
+
5067
+ // optional MLP bias
5068
+ layer.ffn_gate_b = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "bias", i), {n_ff}, false);
5069
+ layer.ffn_down_b = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, false);
5070
+ layer.ffn_up_b = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff}, false);
5065
5071
} else {
5066
5072
layer.ffn_gate_inp = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), {n_embd, n_expert});
5067
5073
@@ -7224,9 +7230,9 @@ struct llm_build_context {
7224
7230
cb(cur, "ffn_norm", il);
7225
7231
7226
7232
cur = llm_build_ffn(ctx0, cur,
7227
- model.layers[il].ffn_up, NULL ,
7228
- model.layers[il].ffn_gate, NULL ,
7229
- model.layers[il].ffn_down, NULL ,
7233
+ model.layers[il].ffn_up, model.layers[il].ffn_up_b ,
7234
+ model.layers[il].ffn_gate, model.layers[il].ffn_gate_b ,
7235
+ model.layers[il].ffn_down, model.layers[il].ffn_down_b ,
7230
7236
NULL,
7231
7237
LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
7232
7238
cb(cur, "ffn_out", il);
0 commit comments