Skip to content

Commit c2f407e

Browse files
committed
cleanup convert-hf-to-gguf.py
1 parent b8ff85e commit c2f407e

File tree

1 file changed

+47
-51
lines changed

1 file changed

+47
-51
lines changed

convert-hf-to-gguf.py

Lines changed: 47 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import sys
1111
from enum import IntEnum
1212
from pathlib import Path
13-
from typing import TYPE_CHECKING, Any, ContextManager, Iterator, cast
13+
from typing import TYPE_CHECKING, Any, ContextManager, Iterator, Sequence, cast
1414

1515
import numpy as np
1616
import torch
@@ -25,15 +25,6 @@
2525
from convert import HfVocab
2626

2727

28-
# check for any of the given keys in the dictionary and return the value of the first key found
29-
def get_key_opts(d, keys):
30-
for k in keys:
31-
if k in d:
32-
return d[k]
33-
print(f"Could not find any of {keys}")
34-
sys.exit()
35-
36-
3728
###### MODEL DEFINITIONS ######
3829

3930
class SentencePieceTokenTypes(IntEnum):
@@ -58,6 +49,15 @@ def __init__(self, dir_model: Path, ftype: int, fname_out: Path, is_big_endian:
5849
self.hparams = Model.load_hparams(self.dir_model)
5950
self.model_arch = self._get_model_architecture()
6051
self.gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=False)
52+
self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer"])
53+
54+
def find_hparam(self, keys: Sequence[str], optional: bool = False) -> Any:
55+
key = next((k for k in keys if k in self.hparams), None)
56+
if key is not None:
57+
return self.hparams[key]
58+
if optional:
59+
return None
60+
raise KeyError(f"could not find any of: {keys}")
6161

6262
def set_vocab(self):
6363
self._set_vocab_gpt2()
@@ -79,28 +79,33 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
7979

8080
def set_gguf_parameters(self):
8181
self.gguf_writer.add_name(self.dir_model.name)
82-
self.gguf_writer.add_block_count(self.hparams.get(
83-
"n_layers", self.hparams.get("num_hidden_layers", self.hparams.get("n_layer")),
84-
))
82+
self.gguf_writer.add_block_count(self.block_count)
83+
8584
if (n_ctx := self.hparams.get("max_position_embeddings")) is not None:
8685
self.gguf_writer.add_context_length(n_ctx)
87-
if (n_embd := self.hparams.get("hidden_size")) is not None:
88-
self.gguf_writer.add_embedding_length(n_embd)
89-
if (n_ff := self.hparams.get("intermediate_size")) is not None:
86+
87+
n_embd = self.find_hparam(["hidden_size", "n_embd"])
88+
self.gguf_writer.add_embedding_length(n_embd)
89+
90+
if (n_ff := self.find_hparam(["intermediate_size", "n_inner"])) is not None:
9091
self.gguf_writer.add_feed_forward_length(n_ff)
91-
if (n_head := self.hparams.get("num_attention_heads")) is not None:
92-
self.gguf_writer.add_head_count(n_head)
92+
93+
n_head = self.find_hparam(["num_attention_heads", "n_head"])
94+
self.gguf_writer.add_head_count(n_head)
95+
9396
if (n_head_kv := self.hparams.get("num_key_value_heads")) is not None:
9497
self.gguf_writer.add_head_count_kv(n_head_kv)
9598

96-
if (n_rms_eps := self.hparams.get("rms_norm_eps")) is not None:
97-
self.gguf_writer.add_layer_norm_rms_eps(n_rms_eps)
99+
if (f_rms_eps := self.hparams.get("rms_norm_eps")) is not None:
100+
self.gguf_writer.add_layer_norm_rms_eps(f_rms_eps)
101+
if (f_norm_eps := self.hparams.get("layer_norm_eps")) is not None:
102+
self.gguf_writer.add_layer_norm_eps(f_norm_eps)
98103
if (n_experts := self.hparams.get("num_local_experts")) is not None:
99104
self.gguf_writer.add_expert_count(n_experts)
100105
if (n_experts_used := self.hparams.get("num_experts_per_tok")) is not None:
101106
self.gguf_writer.add_expert_used_count(n_experts_used)
102107

103-
self.gguf_writer.add_parallel_residual(self.hparams.get("use_parallel_residual", True))
108+
self.gguf_writer.add_file_type(self.ftype)
104109

105110
def write_tensors(self):
106111
block_count = self.hparams.get("n_layers", self.hparams.get("num_hidden_layers", self.hparams.get("n_layer")))
@@ -1301,21 +1306,21 @@ def write_tensors(self):
13011306

13021307
class Phi2Model(Model):
13031308
def set_gguf_parameters(self):
1304-
block_count = get_key_opts(self.hparams, ["num_hidden_layers", "n_layer"])
1309+
block_count = self.find_hparam(["num_hidden_layers", "n_layer"])
13051310

1306-
rot_pct = get_key_opts(self.hparams, ["partial_rotary_factor"])
1307-
n_embd = get_key_opts(self.hparams, ["hidden_size", "n_embd"])
1308-
n_head = get_key_opts(self.hparams, ["num_attention_heads", "n_head"])
1311+
rot_pct = self.find_hparam(["partial_rotary_factor"])
1312+
n_embd = self.find_hparam(["hidden_size", "n_embd"])
1313+
n_head = self.find_hparam(["num_attention_heads", "n_head"])
13091314

13101315
self.gguf_writer.add_name("Phi2")
1311-
self.gguf_writer.add_context_length(get_key_opts(self.hparams, ["n_positions", "max_position_embeddings"]))
1316+
self.gguf_writer.add_context_length(self.find_hparam(["n_positions", "max_position_embeddings"]))
13121317

13131318
self.gguf_writer.add_embedding_length(n_embd)
13141319
self.gguf_writer.add_feed_forward_length(4 * n_embd)
13151320
self.gguf_writer.add_block_count(block_count)
13161321
self.gguf_writer.add_head_count(n_head)
13171322
self.gguf_writer.add_head_count_kv(n_head)
1318-
self.gguf_writer.add_layer_norm_eps(get_key_opts(self.hparams, ["layer_norm_epsilon", "layer_norm_eps"]))
1323+
self.gguf_writer.add_layer_norm_eps(self.find_hparam(["layer_norm_epsilon", "layer_norm_eps"]))
13191324
self.gguf_writer.add_rope_dimension_count(int(rot_pct * n_embd) // n_head)
13201325
self.gguf_writer.add_file_type(self.ftype)
13211326
self.gguf_writer.add_add_bos_token(False)
@@ -1640,21 +1645,12 @@ def write_tensors(self):
16401645
class BertModel(Model):
16411646
def __init__(self, *args, **kwargs):
16421647
super().__init__(*args, **kwargs)
1643-
self.block_count = self.hparams["num_hidden_layers"]
16441648
self.vocab_size = None
16451649

16461650
def set_gguf_parameters(self):
1647-
# TODO(cebtenzzre): merge with parent class
1648-
self.gguf_writer.add_name(self.dir_model.name)
1649-
self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
1650-
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
1651-
self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"])
1652-
self.gguf_writer.add_block_count(self.block_count)
1653-
self.gguf_writer.add_head_count(self.hparams["num_attention_heads"])
1654-
self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_eps"])
1651+
super().set_gguf_parameters()
16551652
self.gguf_writer.add_causal_attention(False)
16561653
self.gguf_writer.add_pooling_layer(True)
1657-
self.gguf_writer.add_file_type(self.ftype)
16581654

16591655
def set_vocab(self):
16601656
path = self.dir_model
@@ -1678,7 +1674,7 @@ def phantom(tok, typ):
16781674
if tok.startswith(b"##"):
16791675
return tok[2:]
16801676
return b"\xe2\x96\x81" + tok
1681-
tokens = [phantom(t, y) for t, y in zip(tokens, toktypes)]
1677+
tokens = tuple(phantom(t, y) for t, y in zip(tokens, toktypes))
16821678

16831679
# set up bos and eos tokens (cls and sep)
16841680
self.gguf_writer.add_bos_token_id(vocab.tokenizer.cls_token_id)
@@ -1732,34 +1728,34 @@ def write_tensors(self):
17321728

17331729
class NomicBertModel(BertModel):
17341730
def __init__(self, *args, **kwargs):
1735-
Model.__init__(self, *args, **kwargs)
1736-
self.block_count = self.hparams["n_layer"]
1731+
super().__init__(*args, **kwargs)
1732+
1733+
# the HF config claims n_ctx=8192, but it uses RoPE scaling
1734+
self.hparams["n_ctx"] = 2048
1735+
1736+
# SwigLU activation
17371737
assert self.hparams["activation_function"] == "swiglu"
1738-
assert self.hparams["causal"] is False # True is untested
1738+
# this doesn't do anything in the HF version
1739+
assert self.hparams["causal"] is False
1740+
# no bias tensors
17391741
assert self.hparams["qkv_proj_bias"] is False
17401742
assert self.hparams["mlp_fc1_bias"] is False
17411743
assert self.hparams["mlp_fc2_bias"] is False
1744+
# norm at end of layer
17421745
assert self.hparams["prenorm"] is False
1746+
# standard RoPE
17431747
assert self.hparams["rotary_emb_fraction"] == 1.0
17441748
assert self.hparams["rotary_emb_interleaved"] is False
17451749
assert self.hparams["rotary_emb_scale_base"] is None
17461750

17471751
def set_gguf_parameters(self):
1748-
# TODO(cebtenzzre): merge with parent class
1749-
self.gguf_writer.add_name(self.dir_model.name)
1750-
# the HF config claims n_ctx=8192, but it uses RoPE scaling
1751-
self.gguf_writer.add_context_length(2048)
1752-
self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
1753-
self.gguf_writer.add_feed_forward_length(self.hparams["n_inner"])
1754-
self.gguf_writer.add_block_count(self.block_count)
1755-
self.gguf_writer.add_head_count(self.hparams["n_head"])
1756-
self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
1752+
super().set_gguf_parameters()
17571753
self.gguf_writer.add_causal_attention(self.hparams["causal"])
17581754
self.gguf_writer.add_rope_freq_base(self.hparams["rotary_emb_base"])
17591755
self.gguf_writer.add_pooling_layer(True)
1760-
self.gguf_writer.add_file_type(self.ftype)
17611756

17621757
def get_tensors(self):
1758+
assert self.vocab_size is not None
17631759
for name, data in super().get_tensors():
17641760
# Nomic Embed's token embeddings tensor is padded, but llama.cpp wants tensor sizes to match exactly.
17651761
if name == 'embeddings.word_embeddings.weight' and data.shape[1] != self.vocab_size:

0 commit comments

Comments
 (0)