Skip to content

Commit cc0ac09

Browse files
author
Joan Martinez
committed
feat: add changes to handle jina v2 base code
1 parent 2b737ca commit cc0ac09

File tree

5 files changed

+50
-7
lines changed

5 files changed

+50
-7
lines changed

convert-hf-to-gguf-update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ class TOKENIZER_TYPE(IntEnum):
8282
{"name": "jina-v2-es", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-es", },
8383
{"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-de", },
8484
{"name": "smaug-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct", },
85+
{"name": "jina-v2-code", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-code", },
8586
]
8687

8788

convert-hf-to-gguf.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -422,9 +422,6 @@ def get_vocab_base_pre(self, tokenizer) -> str:
422422
# NOTE: if you get an error here, you need to update the convert-hf-to-gguf-update.py script
423423
# or pull the latest version of the model from Huggingface
424424
# don't edit the hashes manually!
425-
if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5":
426-
# ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B
427-
res = "llama-bpe"
428425
if chkhsh == "049ecf7629871e3041641907f3de7c733e4dbfdc736f57d882ba0b0845599754":
429426
# ref: https://huggingface.co/deepseek-ai/deepseek-llm-7b-base
430427
res = "deepseek-llm"
@@ -461,9 +458,6 @@ def get_vocab_base_pre(self, tokenizer) -> str:
461458
if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166":
462459
# ref: https://huggingface.co/allenai/OLMo-1.7-7B-hf
463460
res = "olmo"
464-
if chkhsh == "a8594e3edff7c29c003940395316294b2c623e09894deebbc65f33f1515df79e":
465-
# ref: https://huggingface.co/databricks/dbrx-base
466-
res = "dbrx"
467461
if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
468462
# ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-en
469463
res = "jina-v2-en"
@@ -476,6 +470,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
476470
if chkhsh == "c136ed14d01c2745d4f60a9596ae66800e2b61fa45643e72436041855ad4089d":
477471
# ref: https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct
478472
res = "smaug-bpe"
473+
if chkhsh == "7967bfa498ade6b757b064f31e964dddbb80f8f9a4d68d4ba7998fcf281c531a":
474+
# ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-code
475+
res = "jina-v2-code"
479476

480477
if res is None:
481478
logger.warning("\n")
@@ -2442,11 +2439,13 @@ def __init__(self, *args, **kwargs):
24422439

24432440
def get_tensors(self):
24442441
for name, data in super().get_tensors():
2445-
if 'gated_layers' in name:
2442+
if 'gated_layer' in name:
24462443
d1 = data[:self.intermediate_size, :]
24472444
name1 = name.replace('gated_layers', 'gated_layers_w')
2445+
name1 = name.replace('up_gated_layer', 'gated_layers_w')
24482446
d2 = data[self.intermediate_size:, :]
24492447
name2 = name.replace('gated_layers', 'gated_layers_v')
2448+
name2 = name.replace('up_gated_layer', 'gated_layers_v')
24502449
yield name1, d1
24512450
yield name2, d2
24522451
continue

gguf-py/gguf/constants.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,8 @@ class MODEL_TENSOR(IntEnum):
186186
ATTN_Q_NORM = auto()
187187
ATTN_K_NORM = auto()
188188
LAYER_OUT_NORM = auto()
189+
LAYER_NORM_1 = auto()
190+
LAYER_NORM_2 = auto()
189191
SSM_IN = auto()
190192
SSM_CONV1D = auto()
191193
SSM_X = auto()
@@ -274,6 +276,8 @@ class MODEL_TENSOR(IntEnum):
274276
MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
275277
MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
276278
MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
279+
MODEL_TENSOR.LAYER_NORM_1: "blk.{bid}.layer_norm_1",
280+
MODEL_TENSOR.LAYER_NORM_2: "blk.{bid}.layer_norm_2",
277281
MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
278282
MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
279283
MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
@@ -426,6 +430,8 @@ class MODEL_TENSOR(IntEnum):
426430
MODEL_TENSOR.FFN_GATE,
427431
MODEL_TENSOR.FFN_DOWN,
428432
MODEL_TENSOR.LAYER_OUT_NORM,
433+
MODEL_TENSOR.LAYER_NORM_1,
434+
MODEL_TENSOR.LAYER_NORM_2,
429435
],
430436
MODEL_ARCH.MPT: [
431437
MODEL_TENSOR.TOKEN_EMBD,

gguf-py/gguf/tensor_mapping.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,7 @@ class TensorNameMap:
311311
"model.layers.{bid}.mlp.c_proj", # starcoder2
312312
"encoder.layer.{bid}.mlp.wo", # jina-bert-v2
313313
"model.layers.{bid}.residual_mlp.w2", # arctic
314+
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
314315
),
315316

316317
MODEL_TENSOR.FFN_DOWN_EXP: (
@@ -350,6 +351,18 @@ class TensorNameMap:
350351
"encoder.layers.{bid}.norm2", # nomic-bert
351352
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok
352353
"encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
354+
"encoder.layer.{bid}.layer_norm_1", # jina-v2-code
355+
"encoder.layer.{bid}.layer_norm_2" # jina-v2-code
356+
),
357+
358+
359+
MODEL_TENSOR.LAYER_NORM_1: (
360+
"encoder.layer.{bid}.layer_norm_1", # jina-v2-code
361+
),
362+
363+
364+
MODEL_TENSOR.LAYER_NORM_2: (
365+
"encoder.layer.{bid}.layer_norm_2", # jina-v2-code
353366
),
354367

355368
MODEL_TENSOR.SSM_IN: (

llama.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,8 @@ enum llm_tensor {
496496
LLM_TENSOR_ATTN_KV_B,
497497
LLM_TENSOR_ATTN_Q_A_NORM,
498498
LLM_TENSOR_ATTN_KV_A_NORM,
499+
LLM_TENSOR_LAYER_NORM_1,
500+
LLM_TENSOR_LAYER_NORM_2,
499501
};
500502

501503
static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NAMES = {
@@ -717,6 +719,8 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA
717719
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
718720
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
719721
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
722+
{ LLM_TENSOR_LAYER_NORM_1, "blk.%d.layer_norm_1" },
723+
{ LLM_TENSOR_LAYER_NORM_2, "blk.%d.layer_norm_2" },
720724
},
721725
},
722726
{
@@ -2010,6 +2014,12 @@ struct llama_layer {
20102014
struct ggml_tensor * layer_out_norm_b;
20112015
struct ggml_tensor * ffn_norm_exps;
20122016

2017+
// extra normalization layers needed by `jina-embeddings-v2-base-code`
2018+
struct ggml_tensor * layer_norm_1;
2019+
struct ggml_tensor * layer_norm_1_b;
2020+
struct ggml_tensor * layer_norm_2;
2021+
struct ggml_tensor * layer_norm_2_b;
2022+
20132023
// ff
20142024
struct ggml_tensor * ffn_gate; // w1
20152025
struct ggml_tensor * ffn_down; // w2
@@ -5537,6 +5547,12 @@ static bool llm_load_tensors(
55375547
layer.attn_out_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "weight", i), {n_embd}); //output_norm
55385548
layer.attn_out_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "bias", i), {n_embd});
55395549

5550+
layer.layer_norm_1 = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_NORM_1, "weight", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
5551+
layer.layer_norm_1_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_NORM_1, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
5552+
5553+
layer.layer_norm_2 = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_NORM_2, "weight", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
5554+
layer.layer_norm_2_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_NORM_2, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
5555+
55405556
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
55415557
layer.ffn_gate = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff});
55425558

@@ -8500,6 +8516,14 @@ struct llm_build_context {
85008516
// attention layer norm
85018517
cur = llm_build_norm(ctx0, cur, hparams, model.layers[il].attn_out_norm, model.layers[il].attn_out_norm_b, LLM_NORM, cb, il);
85028518

8519+
if (model.layers[il].layer_norm_1 != nullptr) {
8520+
cur = llm_build_norm(ctx0, cur, hparams, model.layers[il].layer_norm_1, model.layers[il].layer_norm_1_b, LLM_NORM, cb, il);
8521+
}
8522+
8523+
if (model.layers[il].layer_norm_2 != nullptr) {
8524+
cur = llm_build_norm(ctx0, cur, hparams, model.layers[il].layer_norm_2, model.layers[il].layer_norm_2_b, LLM_NORM, cb, il);
8525+
}
8526+
85038527
struct ggml_tensor * ffn_inp = cur;
85048528
cb(ffn_inp, "ffn_inp", il);
85058529

0 commit comments

Comments
 (0)