Skip to content

Commit a40156a

Browse files
author
Joan Martinez
committed
fix: use other tensors
1 parent 747d17a commit a40156a

File tree

3 files changed

+9
-19
lines changed

3 files changed

+9
-19
lines changed

gguf-py/gguf/constants.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,6 @@ class MODEL_TENSOR(IntEnum):
370370
MODEL_TENSOR.ATTN_OUT,
371371
MODEL_TENSOR.FFN_UP,
372372
MODEL_TENSOR.FFN_DOWN,
373-
MODEL_TENSOR.FFN_GATE,
374373
MODEL_TENSOR.LAYER_OUT_NORM,
375374
],
376375
MODEL_ARCH.MPT: [

gguf-py/gguf/tensor_mapping.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,7 @@ class TensorNameMap:
228228
"model.layers.{bid}.feed_forward.w3", # internlm2
229229
"encoder.layers.{bid}.mlp.fc11", # nomic-bert
230230
"model.layers.{bid}.mlp.c_fc", # starcoder2
231+
"encoder.layer.{bid}.mlp.gated_layers", # jina-bert
231232
),
232233

233234
MODEL_TENSOR.FFN_UP_EXP: (
@@ -248,7 +249,6 @@ class TensorNameMap:
248249
"model.layers.layers.{bid}.mlp.gate_proj", # plamo
249250
"model.layers.{bid}.feed_forward.w1", # internlm2
250251
"encoder.layers.{bid}.mlp.fc12", # nomic-bert
251-
"encoder.layer.{bid}.mlp.gated_layers", # jina-bert
252252
),
253253

254254
MODEL_TENSOR.FFN_GATE_EXP: (

llama.cpp

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1922,16 +1922,6 @@ struct llama_layer {
19221922
// mamba bias
19231923
struct ggml_tensor * ssm_conv1d_b;
19241924
struct ggml_tensor * ssm_dt_b;
1925-
1926-
//glu mlp (jina-bert)
1927-
struct ggml_tensor * mlp_gated_layer_w;
1928-
1929-
struct ggml_tensor * mlp_wo_w;
1930-
struct ggml_tensor * mlp_wo_b;
1931-
1932-
struct ggml_tensor * mlp_norm_w;
1933-
struct ggml_tensor * mlp_norm_b;
1934-
19351925
};
19361926

19371927
struct llama_kv_cell {
@@ -4904,13 +4894,13 @@ static bool llm_load_tensors(
49044894
layer.attn_out_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "bias", i), {n_embd});
49054895

49064896
// TODO: HANDLE ALL THE MLP
4907-
layer.mlp_gated_layer_w = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, 2 * n_ff});
4897+
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, 2 * n_ff});
49084898

4909-
layer.mlp_wo_w = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd});
4910-
layer.mlp_wo_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd});
4899+
layer.ffn_down = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd});
4900+
layer.ffn_down_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd});
49114901

4912-
layer.mlp_norm_w = ml.create_tensor(ctx_split, tn(LLM_TENSOR_LAYER_OUT_NORM, "weight", i), {n_embd});
4913-
layer.mlp_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_OUT_NORM, "bias", i), {n_embd});
4902+
layer.layer_out_norm = ml.create_tensor(ctx_split, tn(LLM_TENSOR_LAYER_OUT_NORM, "weight", i), {n_embd});
4903+
layer.layer_out_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_OUT_NORM, "bias", i), {n_embd});
49144904
}
49154905
} break;
49164906
case LLM_ARCH_BLOOM:
@@ -7564,7 +7554,7 @@ struct llm_build_context {
75647554
struct ggml_tensor * Vcur;
75657555

75667556
// self-attention
7567-
if (model.arch == LLM_ARCH_BERT) {
7557+
if (model.arch == LLM_ARCH_BERT || model.arch == LLM_ARCH_JINA_BERT) {
75687558
Qcur = ggml_add(ctx0, ggml_mul_mat(ctx0, model.layers[il].wq, cur), model.layers[il].bq);
75697559
cb(Qcur, "Qcur", il);
75707560

@@ -7654,7 +7644,7 @@ struct llm_build_context {
76547644
cb(ffn_inp, "ffn_inp", il);
76557645

76567646
// feed-forward network
7657-
if (model.arch == LLM_ARCH_BERT) {
7647+
if (model.arch == LLM_ARCH_BERT || model.arch == LLM_ARCH_JINA_BERT) {
76587648
cur = llm_build_ffn(ctx0, cur,
76597649
model.layers[il].ffn_up, model.layers[il].ffn_up_b,
76607650
NULL, NULL,
@@ -7677,6 +7667,7 @@ struct llm_build_context {
76777667
// output layer norm
76787668
cur = llm_build_norm(ctx0, cur, hparams, model.layers[il].layer_out_norm, model.layers[il].layer_out_norm_b, LLM_NORM, cb, il);
76797669

7670+
76807671
// input for next layer
76817672
inpL = cur;
76827673
}

0 commit comments

Comments
 (0)