Skip to content

Commit 120f7bf

Browse files
sroeckergiuseppe
authored andcommitted
Add optional MLP bias for Granite models
Add optional MLP bias for ARCH_LLAMA to support Granite models. Partially addresses /issues/7116 Still needs some more changes to properly support Granite.
1 parent d041d2c commit 120f7bf

File tree

1 file changed

+11
-5
lines changed

1 file changed

+11
-5
lines changed

llama.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1953,8 +1953,9 @@ struct llama_layer {
19531953
struct ggml_tensor * ffn_up_shexp;
19541954

19551955
// ff bias
1956-
struct ggml_tensor * ffn_down_b; // b2
1957-
struct ggml_tensor * ffn_up_b; // b3
1956+
struct ggml_tensor * ffn_gate_b = nullptr;
1957+
struct ggml_tensor * ffn_down_b = nullptr; // b2
1958+
struct ggml_tensor * ffn_up_b = nullptr; // b3
19581959
struct ggml_tensor * ffn_act;
19591960

19601961
// mamba proj
@@ -5103,6 +5104,11 @@ static bool llm_load_tensors(
51035104
layer.ffn_gate = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff});
51045105
layer.ffn_down = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd});
51055106
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
5107+
5108+
// optional MLP bias
5109+
layer.ffn_gate_b = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "bias", i), {n_ff}, llama_model_loader::TENSOR_NOT_REQUIRED);
5110+
layer.ffn_down_b = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
5111+
layer.ffn_up_b = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff}, llama_model_loader::TENSOR_NOT_REQUIRED);
51065112
} else {
51075113
layer.ffn_gate_inp = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), {n_embd, n_expert});
51085114

@@ -7305,9 +7311,9 @@ struct llm_build_context {
73057311
cb(cur, "ffn_norm", il);
73067312

73077313
cur = llm_build_ffn(ctx0, cur,
7308-
model.layers[il].ffn_up, NULL,
7309-
model.layers[il].ffn_gate, NULL,
7310-
model.layers[il].ffn_down, NULL,
7314+
model.layers[il].ffn_up, model.layers[il].ffn_up_b,
7315+
model.layers[il].ffn_gate, model.layers[il].ffn_gate_b,
7316+
model.layers[il].ffn_down, model.layers[il].ffn_down_b,
73117317
NULL,
73127318
LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
73137319
cb(cur, "ffn_out", il);

0 commit comments

Comments
 (0)