@@ -205,7 +205,7 @@ enum llm_arch {
205
205
LLM_ARCH_REFACT,
206
206
LLM_ARCH_BERT,
207
207
LLM_ARCH_NOMIC_BERT,
208
- LLM_ARCH_JINA_BERT ,
208
+ LLM_ARCH_JINA_BERT_V2 ,
209
209
LLM_ARCH_BLOOM,
210
210
LLM_ARCH_STABLELM,
211
211
LLM_ARCH_QWEN,
@@ -241,7 +241,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
241
241
{ LLM_ARCH_REFACT, "refact" },
242
242
{ LLM_ARCH_BERT, "bert" },
243
243
{ LLM_ARCH_NOMIC_BERT, "nomic-bert" },
244
- { LLM_ARCH_JINA_BERT , "jina-bert" },
244
+ { LLM_ARCH_JINA_BERT_V2 , "jina-bert-v2" },
245
245
{ LLM_ARCH_BLOOM, "bloom" },
246
246
{ LLM_ARCH_STABLELM, "stablelm" },
247
247
{ LLM_ARCH_QWEN, "qwen" },
@@ -690,7 +690,7 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA
690
690
},
691
691
},
692
692
{
693
- LLM_ARCH_JINA_BERT ,
693
+ LLM_ARCH_JINA_BERT_V2 ,
694
694
{
695
695
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
696
696
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
@@ -3893,7 +3893,7 @@ static void llm_load_hparams(
3893
3893
model.type = e_model::MODEL_335M; break; // bge-large
3894
3894
}
3895
3895
} break;
3896
- case LLM_ARCH_JINA_BERT :
3896
+ case LLM_ARCH_JINA_BERT_V2 :
3897
3897
{
3898
3898
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
3899
3899
ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
@@ -4137,7 +4137,7 @@ static void llm_load_hparams(
4137
4137
4138
4138
model.ftype = ml.ftype;
4139
4139
4140
- if (hparams.f_max_alibi_bias > 0.0f && model.arch != LLM_ARCH_JINA_BERT ) {
4140
+ if (hparams.f_max_alibi_bias > 0.0f && model.arch != LLM_ARCH_JINA_BERT_V2 ) {
4141
4141
hparams.need_kq_pos = true;
4142
4142
}
4143
4143
@@ -5113,7 +5113,7 @@ static bool llm_load_tensors(
5113
5113
layer.layer_out_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_OUT_NORM, "bias", i), {n_embd});
5114
5114
}
5115
5115
} break;
5116
- case LLM_ARCH_JINA_BERT :
5116
+ case LLM_ARCH_JINA_BERT_V2 :
5117
5117
{
5118
5118
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}); // word_embeddings
5119
5119
model.type_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_TYPES, "weight"), {n_embd, n_vocab_type}); //token_type_embeddings
@@ -7994,7 +7994,7 @@ struct llm_build_context {
7994
7994
struct ggml_tensor * inpL;
7995
7995
struct ggml_tensor * inp_pos = nullptr;
7996
7996
7997
- if (model.arch != LLM_ARCH_JINA_BERT ) {
7997
+ if (model.arch != LLM_ARCH_JINA_BERT_V2 ) {
7998
7998
inp_pos = build_inp_pos();
7999
7999
}
8000
8000
struct ggml_tensor * inp_mean = build_inp_mean();
@@ -8027,7 +8027,7 @@ struct llm_build_context {
8027
8027
struct ggml_tensor * Vcur;
8028
8028
8029
8029
// self-attention
8030
- if (model.arch == LLM_ARCH_BERT || model.arch == LLM_ARCH_JINA_BERT ) {
8030
+ if (model.arch == LLM_ARCH_BERT || model.arch == LLM_ARCH_JINA_BERT_V2 ) {
8031
8031
Qcur = ggml_add(ctx0, ggml_mul_mat(ctx0, model.layers[il].wq, cur), model.layers[il].bq);
8032
8032
cb(Qcur, "Qcur", il);
8033
8033
@@ -8137,7 +8137,7 @@ struct llm_build_context {
8137
8137
model.layers[il].ffn_down, model.layers[il].ffn_down_b,
8138
8138
NULL,
8139
8139
LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
8140
- } else if (model.arch == LLM_ARCH_JINA_BERT ) {
8140
+ } else if (model.arch == LLM_ARCH_JINA_BERT_V2 ) {
8141
8141
cur = llm_build_ffn(ctx0, cur,
8142
8142
model.layers[il].ffn_up, NULL,
8143
8143
model.layers[il].ffn_gate, NULL,
@@ -10544,7 +10544,7 @@ static struct ggml_cgraph * llama_build_graph(
10544
10544
result = llm.build_refact();
10545
10545
} break;
10546
10546
case LLM_ARCH_BERT:
10547
- case LLM_ARCH_JINA_BERT :
10547
+ case LLM_ARCH_JINA_BERT_V2 :
10548
10548
case LLM_ARCH_NOMIC_BERT:
10549
10549
{
10550
10550
result = llm.build_bert();
@@ -15473,7 +15473,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
15473
15473
case LLM_ARCH_REFACT:
15474
15474
case LLM_ARCH_BLOOM:
15475
15475
case LLM_ARCH_MAMBA:
15476
- case LLM_ARCH_JINA_BERT :
15476
+ case LLM_ARCH_JINA_BERT_V2 :
15477
15477
return LLAMA_ROPE_TYPE_NONE;
15478
15478
15479
15479
// use what we call a normal RoPE, operating on pairs of consecutive head values
0 commit comments