Skip to content

Commit c3f4b1f

Browse files
author
Joan Martinez
committed
feat: rename Jina Bert to Jina Bert V2
1 parent dfa0676 commit c3f4b1f

File tree

4 files changed

+22
-22
lines changed

4 files changed

+22
-22
lines changed

convert-hf-to-gguf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2718,8 +2718,8 @@ def write_tensors(self):
27182718

27192719

27202720
@Model.register("JinaBertModel", "JinaBertForMaskedLM")
2721-
class JinaBertModel(BertModel):
2722-
model_arch = gguf.MODEL_ARCH.JINA_BERT
2721+
class JinaBertV2Model(BertModel):
2722+
model_arch = gguf.MODEL_ARCH.JINA_BERT_V2
27232723

27242724
def __init__(self, *args, **kwargs):
27252725
super().__init__(*args, **kwargs)

gguf-py/gguf/constants.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ class MODEL_ARCH(IntEnum):
118118
REFACT = auto()
119119
BERT = auto()
120120
NOMIC_BERT = auto()
121-
JINA_BERT = auto()
121+
JINA_BERT_V2 = auto()
122122
BLOOM = auto()
123123
STABLELM = auto()
124124
QWEN = auto()
@@ -195,7 +195,7 @@ class MODEL_TENSOR(IntEnum):
195195
MODEL_ARCH.REFACT: "refact",
196196
MODEL_ARCH.BERT: "bert",
197197
MODEL_ARCH.NOMIC_BERT: "nomic-bert",
198-
MODEL_ARCH.JINA_BERT: "jina-bert",
198+
MODEL_ARCH.JINA_BERT_V2: "jina-bert-v2",
199199
MODEL_ARCH.BLOOM: "bloom",
200200
MODEL_ARCH.STABLELM: "stablelm",
201201
MODEL_ARCH.QWEN: "qwen",
@@ -380,7 +380,7 @@ class MODEL_TENSOR(IntEnum):
380380
MODEL_TENSOR.FFN_UP,
381381
MODEL_TENSOR.LAYER_OUT_NORM,
382382
],
383-
MODEL_ARCH.JINA_BERT: [
383+
MODEL_ARCH.JINA_BERT_V2: [
384384
MODEL_TENSOR.TOKEN_EMBD,
385385
MODEL_TENSOR.TOKEN_EMBD_NORM,
386386
MODEL_TENSOR.TOKEN_TYPES,

gguf-py/gguf/tensor_mapping.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ class TensorNameMap:
238238
"model.layers.{bid}.feed_forward.w3", # internlm2
239239
"encoder.layers.{bid}.mlp.fc11", # nomic-bert
240240
"model.layers.{bid}.mlp.c_fc", # starcoder2
241-
"encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert
241+
"encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
242242
),
243243

244244
MODEL_TENSOR.FFN_UP_EXP: (
@@ -265,7 +265,7 @@ class TensorNameMap:
265265
"model.layers.layers.{bid}.mlp.gate_proj", # plamo
266266
"model.layers.{bid}.feed_forward.w1", # internlm2
267267
"encoder.layers.{bid}.mlp.fc12", # nomic-bert
268-
"encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert
268+
"encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2
269269
),
270270

271271
MODEL_TENSOR.FFN_GATE_EXP: (
@@ -299,7 +299,7 @@ class TensorNameMap:
299299
"model.layers.{bid}.feed_forward.w2", # internlm2
300300
"encoder.layers.{bid}.mlp.fc2", # nomic-bert
301301
"model.layers.{bid}.mlp.c_proj", # starcoder2
302-
"encoder.layer.{bid}.mlp.wo", # jina-bert
302+
"encoder.layer.{bid}.mlp.wo", # jina-bert-v2
303303
),
304304

305305
MODEL_TENSOR.FFN_DOWN_EXP: (
@@ -318,15 +318,15 @@ class TensorNameMap:
318318
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
319319
"model.layers.{bid}.self_attn.q_norm", # cohere
320320
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
321-
"encoder.layer.{bid}.attention.self.layer_norm_q" # jina-bert
321+
"encoder.layer.{bid}.attention.self.layer_norm_q" # jina-bert-v2
322322
),
323323

324324
MODEL_TENSOR.ATTN_K_NORM: (
325325
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
326326
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
327327
"model.layers.{bid}.self_attn.k_norm", # cohere
328328
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
329-
"encoder.layer.{bid}.attention.self.layer_norm_k" # jina-bert
329+
"encoder.layer.{bid}.attention.self.layer_norm_k" # jina-bert-v2
330330
),
331331

332332
MODEL_TENSOR.ROPE_FREQS: (
@@ -337,7 +337,7 @@ class TensorNameMap:
337337
"encoder.layer.{bid}.output.LayerNorm", # bert
338338
"encoder.layers.{bid}.norm2", # nomic-bert
339339
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok
340-
"encoder.layer.{bid}.mlp.layernorm", # jina-bert
340+
"encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
341341
),
342342

343343
MODEL_TENSOR.SSM_IN: (

llama.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ enum llm_arch {
205205
LLM_ARCH_REFACT,
206206
LLM_ARCH_BERT,
207207
LLM_ARCH_NOMIC_BERT,
208-
LLM_ARCH_JINA_BERT,
208+
LLM_ARCH_JINA_BERT_V2,
209209
LLM_ARCH_BLOOM,
210210
LLM_ARCH_STABLELM,
211211
LLM_ARCH_QWEN,
@@ -241,7 +241,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
241241
{ LLM_ARCH_REFACT, "refact" },
242242
{ LLM_ARCH_BERT, "bert" },
243243
{ LLM_ARCH_NOMIC_BERT, "nomic-bert" },
244-
{ LLM_ARCH_JINA_BERT, "jina-bert" },
244+
{ LLM_ARCH_JINA_BERT_V2, "jina-bert-v2"},
245245
{ LLM_ARCH_BLOOM, "bloom" },
246246
{ LLM_ARCH_STABLELM, "stablelm" },
247247
{ LLM_ARCH_QWEN, "qwen" },
@@ -690,7 +690,7 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA
690690
},
691691
},
692692
{
693-
LLM_ARCH_JINA_BERT,
693+
LLM_ARCH_JINA_BERT_V2,
694694
{
695695
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
696696
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
@@ -3893,7 +3893,7 @@ static void llm_load_hparams(
38933893
model.type = e_model::MODEL_335M; break; // bge-large
38943894
}
38953895
} break;
3896-
case LLM_ARCH_JINA_BERT:
3896+
case LLM_ARCH_JINA_BERT_V2:
38973897
{
38983898
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
38993899
ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
@@ -4137,7 +4137,7 @@ static void llm_load_hparams(
41374137

41384138
model.ftype = ml.ftype;
41394139

4140-
if (hparams.f_max_alibi_bias > 0.0f && model.arch != LLM_ARCH_JINA_BERT) {
4140+
if (hparams.f_max_alibi_bias > 0.0f && model.arch != LLM_ARCH_JINA_BERT_V2) {
41414141
hparams.need_kq_pos = true;
41424142
}
41434143

@@ -5113,7 +5113,7 @@ static bool llm_load_tensors(
51135113
layer.layer_out_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_OUT_NORM, "bias", i), {n_embd});
51145114
}
51155115
} break;
5116-
case LLM_ARCH_JINA_BERT:
5116+
case LLM_ARCH_JINA_BERT_V2:
51175117
{
51185118
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}); // word_embeddings
51195119
model.type_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_TYPES, "weight"), {n_embd, n_vocab_type}); //token_type_embeddings
@@ -7994,7 +7994,7 @@ struct llm_build_context {
79947994
struct ggml_tensor * inpL;
79957995
struct ggml_tensor * inp_pos = nullptr;
79967996

7997-
if (model.arch != LLM_ARCH_JINA_BERT) {
7997+
if (model.arch != LLM_ARCH_JINA_BERT_V2) {
79987998
inp_pos = build_inp_pos();
79997999
}
80008000
struct ggml_tensor * inp_mean = build_inp_mean();
@@ -8027,7 +8027,7 @@ struct llm_build_context {
80278027
struct ggml_tensor * Vcur;
80288028

80298029
// self-attention
8030-
if (model.arch == LLM_ARCH_BERT || model.arch == LLM_ARCH_JINA_BERT) {
8030+
if (model.arch == LLM_ARCH_BERT || model.arch == LLM_ARCH_JINA_BERT_V2) {
80318031
Qcur = ggml_add(ctx0, ggml_mul_mat(ctx0, model.layers[il].wq, cur), model.layers[il].bq);
80328032
cb(Qcur, "Qcur", il);
80338033

@@ -8137,7 +8137,7 @@ struct llm_build_context {
81378137
model.layers[il].ffn_down, model.layers[il].ffn_down_b,
81388138
NULL,
81398139
LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
8140-
} else if (model.arch == LLM_ARCH_JINA_BERT) {
8140+
} else if (model.arch == LLM_ARCH_JINA_BERT_V2) {
81418141
cur = llm_build_ffn(ctx0, cur,
81428142
model.layers[il].ffn_up, NULL,
81438143
model.layers[il].ffn_gate, NULL,
@@ -10544,7 +10544,7 @@ static struct ggml_cgraph * llama_build_graph(
1054410544
result = llm.build_refact();
1054510545
} break;
1054610546
case LLM_ARCH_BERT:
10547-
case LLM_ARCH_JINA_BERT:
10547+
case LLM_ARCH_JINA_BERT_V2:
1054810548
case LLM_ARCH_NOMIC_BERT:
1054910549
{
1055010550
result = llm.build_bert();
@@ -15473,7 +15473,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
1547315473
case LLM_ARCH_REFACT:
1547415474
case LLM_ARCH_BLOOM:
1547515475
case LLM_ARCH_MAMBA:
15476-
case LLM_ARCH_JINA_BERT:
15476+
case LLM_ARCH_JINA_BERT_V2:
1547715477
return LLAMA_ROPE_TYPE_NONE;
1547815478

1547915479
// use what we call a normal RoPE, operating on pairs of consecutive head values

0 commit comments

Comments
 (0)