Skip to content

Commit 4ef245a

Browse files
mpt : add optional bias tensors (#5638)
Update for MPT with optional bias parameters: to work with PhoGPT and SEA-LION models that were pre-trained with 'bias'.
1 parent 973053d commit 4ef245a

File tree

1 file changed

+26
-10
lines changed

1 file changed

+26
-10
lines changed

llama.cpp

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4054,6 +4054,8 @@ static bool llm_load_tensors(
40544054
// output
40554055
{
40564056
model.output_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd});
4057+
model.output_norm_b = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "bias"), {n_embd}, false);
4058+
40574059
model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab});
40584060
}
40594061

@@ -4063,14 +4065,23 @@ static bool llm_load_tensors(
40634065

40644066
auto & layer = model.layers[i];
40654067

4066-
layer.attn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd});
4068+
layer.attn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd});
4069+
layer.attn_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM, "bias", i), {n_embd}, false);
40674070

40684071
layer.wqkv = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, n_embd + 2*n_embd_gqa});
4072+
layer.bqkv = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_QKV, "bias", i), {n_embd + 2*n_embd_gqa}, false);
4073+
40694074
layer.wo = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd});
4075+
layer.bo = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT, "bias", i), {n_embd}, false);
40704076

4071-
layer.ffn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd});
4072-
layer.ffn_down = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd});
4073-
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
4077+
layer.ffn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd});
4078+
layer.ffn_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_NORM, "bias", i), {n_embd}, false);
4079+
4080+
layer.ffn_down = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd});
4081+
layer.ffn_down_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, false);
4082+
4083+
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
4084+
layer.ffn_up_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff}, false);
40744085

40754086
// AWQ ScaleActivation layer
40764087
layer.ffn_act = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_ACT, "scales", i), {n_ff}, false);
@@ -6171,7 +6182,7 @@ struct llm_build_context {
61716182

61726183
attn_norm = llm_build_norm(ctx0, inpL, hparams,
61736184
model.layers[il].attn_norm,
6174-
NULL,
6185+
model.layers[il].attn_norm_b,
61756186
LLM_NORM, cb, il);
61766187
cb(attn_norm, "attn_norm", il);
61776188

@@ -6181,6 +6192,11 @@ struct llm_build_context {
61816192

61826193
cur = ggml_mul_mat(ctx0, model.layers[il].wqkv, cur);
61836194
cb(cur, "wqkv", il);
6195+
6196+
if (model.layers[il].bqkv){
6197+
cur = ggml_add(ctx0, cur, model.layers[il].bqkv);
6198+
cb(cur, "bqkv", il);
6199+
}
61846200

61856201
if (hparams.f_clamp_kqv > 0.0f) {
61866202
cur = ggml_clamp(ctx0, cur, -hparams.f_clamp_kqv, hparams.f_clamp_kqv);
@@ -6198,7 +6214,7 @@ struct llm_build_context {
61986214
Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens);
61996215

62006216
cur = llm_build_kv(ctx0, model, hparams, kv_self, gf,
6201-
model.layers[il].wo, NULL,
6217+
model.layers[il].wo, model.layers[il].bo,
62026218
Kcur, Vcur, Qcur, KQ_mask, KQ_pos, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il);
62036219
cb(cur, "kqv_out", il);
62046220
}
@@ -6211,13 +6227,13 @@ struct llm_build_context {
62116227
{
62126228
cur = llm_build_norm(ctx0, ffn_inp, hparams,
62136229
model.layers[il].ffn_norm,
6214-
NULL,
6230+
model.layers[il].ffn_norm_b,
62156231
LLM_NORM, cb, il);
62166232
cb(cur, "ffn_norm", il);
62176233
cur = llm_build_ffn(ctx0, cur,
6218-
model.layers[il].ffn_up, NULL,
6234+
model.layers[il].ffn_up, model.layers[il].ffn_up_b,
62196235
NULL, NULL,
6220-
model.layers[il].ffn_down, NULL,
6236+
model.layers[il].ffn_down, model.layers[il].ffn_down_b,
62216237
model.layers[il].ffn_act,
62226238
LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
62236239
cb(cur, "ffn_out", il);
@@ -6234,7 +6250,7 @@ struct llm_build_context {
62346250

62356251
cur = llm_build_norm(ctx0, cur, hparams,
62366252
model.output_norm,
6237-
NULL,
6253+
model.output_norm_b,
62386254
LLM_NORM, cb, -1);
62396255
cb(cur, "result_norm", -1);
62406256

0 commit comments

Comments
 (0)