10
10
import sys
11
11
from enum import IntEnum
12
12
from pathlib import Path
13
- from typing import TYPE_CHECKING , Any , ContextManager , Iterator , cast
13
+ from typing import TYPE_CHECKING , Any , ContextManager , Iterator , Sequence , cast
14
14
15
15
import numpy as np
16
16
import torch
25
25
from convert import HfVocab
26
26
27
27
28
- # check for any of the given keys in the dictionary and return the value of the first key found
29
- def get_key_opts (d , keys ):
30
- for k in keys :
31
- if k in d :
32
- return d [k ]
33
- print (f"Could not find any of { keys } " )
34
- sys .exit ()
35
-
36
-
37
28
###### MODEL DEFINITIONS ######
38
29
39
30
class SentencePieceTokenTypes (IntEnum ):
@@ -58,6 +49,15 @@ def __init__(self, dir_model: Path, ftype: int, fname_out: Path, is_big_endian:
58
49
self .hparams = Model .load_hparams (self .dir_model )
59
50
self .model_arch = self ._get_model_architecture ()
60
51
self .gguf_writer = gguf .GGUFWriter (fname_out , gguf .MODEL_ARCH_NAMES [self .model_arch ], endianess = self .endianess , use_temp_file = False )
52
+ self .block_count = self .find_hparam (["n_layers" , "num_hidden_layers" , "n_layer" ])
53
+
54
+ def find_hparam (self , keys : Sequence [str ], optional : bool = False ) -> Any :
55
+ key = next ((k for k in keys if k in self .hparams ), None )
56
+ if key is not None :
57
+ return self .hparams [key ]
58
+ if optional :
59
+ return None
60
+ raise KeyError (f"could not find any of: { keys } " )
61
61
62
62
def set_vocab (self ):
63
63
self ._set_vocab_gpt2 ()
@@ -79,28 +79,33 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
79
79
80
80
def set_gguf_parameters (self ):
81
81
self .gguf_writer .add_name (self .dir_model .name )
82
- self .gguf_writer .add_block_count (self .hparams .get (
83
- "n_layers" , self .hparams .get ("num_hidden_layers" , self .hparams .get ("n_layer" )),
84
- ))
82
+ self .gguf_writer .add_block_count (self .block_count )
83
+
85
84
if (n_ctx := self .hparams .get ("max_position_embeddings" )) is not None :
86
85
self .gguf_writer .add_context_length (n_ctx )
87
- if (n_embd := self .hparams .get ("hidden_size" )) is not None :
88
- self .gguf_writer .add_embedding_length (n_embd )
89
- if (n_ff := self .hparams .get ("intermediate_size" )) is not None :
86
+
87
+ n_embd = self .find_hparam (["hidden_size" , "n_embd" ])
88
+ self .gguf_writer .add_embedding_length (n_embd )
89
+
90
+ if (n_ff := self .find_hparam (["intermediate_size" , "n_inner" ])) is not None :
90
91
self .gguf_writer .add_feed_forward_length (n_ff )
91
- if (n_head := self .hparams .get ("num_attention_heads" )) is not None :
92
- self .gguf_writer .add_head_count (n_head )
92
+
93
+ n_head = self .find_hparam (["num_attention_heads" , "n_head" ])
94
+ self .gguf_writer .add_head_count (n_head )
95
+
93
96
if (n_head_kv := self .hparams .get ("num_key_value_heads" )) is not None :
94
97
self .gguf_writer .add_head_count_kv (n_head_kv )
95
98
96
- if (n_rms_eps := self .hparams .get ("rms_norm_eps" )) is not None :
97
- self .gguf_writer .add_layer_norm_rms_eps (n_rms_eps )
99
+ if (f_rms_eps := self .hparams .get ("rms_norm_eps" )) is not None :
100
+ self .gguf_writer .add_layer_norm_rms_eps (f_rms_eps )
101
+ if (f_norm_eps := self .hparams .get ("layer_norm_eps" )) is not None :
102
+ self .gguf_writer .add_layer_norm_eps (f_norm_eps )
98
103
if (n_experts := self .hparams .get ("num_local_experts" )) is not None :
99
104
self .gguf_writer .add_expert_count (n_experts )
100
105
if (n_experts_used := self .hparams .get ("num_experts_per_tok" )) is not None :
101
106
self .gguf_writer .add_expert_used_count (n_experts_used )
102
107
103
- self .gguf_writer .add_parallel_residual (self .hparams . get ( "use_parallel_residual" , True ) )
108
+ self .gguf_writer .add_file_type (self .ftype )
104
109
105
110
def write_tensors (self ):
106
111
block_count = self .hparams .get ("n_layers" , self .hparams .get ("num_hidden_layers" , self .hparams .get ("n_layer" )))
@@ -1301,21 +1306,21 @@ def write_tensors(self):
1301
1306
1302
1307
class Phi2Model (Model ):
1303
1308
def set_gguf_parameters (self ):
1304
- block_count = get_key_opts ( self .hparams , ["num_hidden_layers" , "n_layer" ])
1309
+ block_count = self .find_hparam ( ["num_hidden_layers" , "n_layer" ])
1305
1310
1306
- rot_pct = get_key_opts ( self .hparams , ["partial_rotary_factor" ])
1307
- n_embd = get_key_opts ( self .hparams , ["hidden_size" , "n_embd" ])
1308
- n_head = get_key_opts ( self .hparams , ["num_attention_heads" , "n_head" ])
1311
+ rot_pct = self .find_hparam ( ["partial_rotary_factor" ])
1312
+ n_embd = self .find_hparam ( ["hidden_size" , "n_embd" ])
1313
+ n_head = self .find_hparam ( ["num_attention_heads" , "n_head" ])
1309
1314
1310
1315
self .gguf_writer .add_name ("Phi2" )
1311
- self .gguf_writer .add_context_length (get_key_opts ( self .hparams , ["n_positions" , "max_position_embeddings" ]))
1316
+ self .gguf_writer .add_context_length (self .find_hparam ( ["n_positions" , "max_position_embeddings" ]))
1312
1317
1313
1318
self .gguf_writer .add_embedding_length (n_embd )
1314
1319
self .gguf_writer .add_feed_forward_length (4 * n_embd )
1315
1320
self .gguf_writer .add_block_count (block_count )
1316
1321
self .gguf_writer .add_head_count (n_head )
1317
1322
self .gguf_writer .add_head_count_kv (n_head )
1318
- self .gguf_writer .add_layer_norm_eps (get_key_opts ( self .hparams , ["layer_norm_epsilon" , "layer_norm_eps" ]))
1323
+ self .gguf_writer .add_layer_norm_eps (self .find_hparam ( ["layer_norm_epsilon" , "layer_norm_eps" ]))
1319
1324
self .gguf_writer .add_rope_dimension_count (int (rot_pct * n_embd ) // n_head )
1320
1325
self .gguf_writer .add_file_type (self .ftype )
1321
1326
self .gguf_writer .add_add_bos_token (False )
@@ -1640,21 +1645,12 @@ def write_tensors(self):
1640
1645
class BertModel (Model ):
1641
1646
def __init__ (self , * args , ** kwargs ):
1642
1647
super ().__init__ (* args , ** kwargs )
1643
- self .block_count = self .hparams ["num_hidden_layers" ]
1644
1648
self .vocab_size = None
1645
1649
1646
1650
def set_gguf_parameters (self ):
1647
- # TODO(cebtenzzre): merge with parent class
1648
- self .gguf_writer .add_name (self .dir_model .name )
1649
- self .gguf_writer .add_context_length (self .hparams ["max_position_embeddings" ])
1650
- self .gguf_writer .add_embedding_length (self .hparams ["hidden_size" ])
1651
- self .gguf_writer .add_feed_forward_length (self .hparams ["intermediate_size" ])
1652
- self .gguf_writer .add_block_count (self .block_count )
1653
- self .gguf_writer .add_head_count (self .hparams ["num_attention_heads" ])
1654
- self .gguf_writer .add_layer_norm_eps (self .hparams ["layer_norm_eps" ])
1651
+ super ().set_gguf_parameters ()
1655
1652
self .gguf_writer .add_causal_attention (False )
1656
1653
self .gguf_writer .add_pooling_layer (True )
1657
- self .gguf_writer .add_file_type (self .ftype )
1658
1654
1659
1655
def set_vocab (self ):
1660
1656
path = self .dir_model
@@ -1678,7 +1674,7 @@ def phantom(tok, typ):
1678
1674
if tok .startswith (b"##" ):
1679
1675
return tok [2 :]
1680
1676
return b"\xe2 \x96 \x81 " + tok
1681
- tokens = [ phantom (t , y ) for t , y in zip (tokens , toktypes )]
1677
+ tokens = tuple ( phantom (t , y ) for t , y in zip (tokens , toktypes ))
1682
1678
1683
1679
# set up bos and eos tokens (cls and sep)
1684
1680
self .gguf_writer .add_bos_token_id (vocab .tokenizer .cls_token_id )
@@ -1732,34 +1728,34 @@ def write_tensors(self):
1732
1728
1733
1729
class NomicBertModel (BertModel ):
1734
1730
def __init__ (self , * args , ** kwargs ):
1735
- Model .__init__ (self , * args , ** kwargs )
1736
- self .block_count = self .hparams ["n_layer" ]
1731
+ super ().__init__ (* args , ** kwargs )
1732
+
1733
+ # the HF config claims n_ctx=8192, but it uses RoPE scaling
1734
+ self .hparams ["n_ctx" ] = 2048
1735
+
1736
+ # SwigLU activation
1737
1737
assert self .hparams ["activation_function" ] == "swiglu"
1738
- assert self .hparams ["causal" ] is False # True is untested
1738
+ # this doesn't do anything in the HF version
1739
+ assert self .hparams ["causal" ] is False
1740
+ # no bias tensors
1739
1741
assert self .hparams ["qkv_proj_bias" ] is False
1740
1742
assert self .hparams ["mlp_fc1_bias" ] is False
1741
1743
assert self .hparams ["mlp_fc2_bias" ] is False
1744
+ # norm at end of layer
1742
1745
assert self .hparams ["prenorm" ] is False
1746
+ # standard RoPE
1743
1747
assert self .hparams ["rotary_emb_fraction" ] == 1.0
1744
1748
assert self .hparams ["rotary_emb_interleaved" ] is False
1745
1749
assert self .hparams ["rotary_emb_scale_base" ] is None
1746
1750
1747
1751
def set_gguf_parameters (self ):
1748
- # TODO(cebtenzzre): merge with parent class
1749
- self .gguf_writer .add_name (self .dir_model .name )
1750
- # the HF config claims n_ctx=8192, but it uses RoPE scaling
1751
- self .gguf_writer .add_context_length (2048 )
1752
- self .gguf_writer .add_embedding_length (self .hparams ["n_embd" ])
1753
- self .gguf_writer .add_feed_forward_length (self .hparams ["n_inner" ])
1754
- self .gguf_writer .add_block_count (self .block_count )
1755
- self .gguf_writer .add_head_count (self .hparams ["n_head" ])
1756
- self .gguf_writer .add_layer_norm_eps (self .hparams ["layer_norm_epsilon" ])
1752
+ super ().set_gguf_parameters ()
1757
1753
self .gguf_writer .add_causal_attention (self .hparams ["causal" ])
1758
1754
self .gguf_writer .add_rope_freq_base (self .hparams ["rotary_emb_base" ])
1759
1755
self .gguf_writer .add_pooling_layer (True )
1760
- self .gguf_writer .add_file_type (self .ftype )
1761
1756
1762
1757
def get_tensors (self ):
1758
+ assert self .vocab_size is not None
1763
1759
for name , data in super ().get_tensors ():
1764
1760
# Nomic Embed's token embeddings tensor is padded, but llama.cpp wants tensor sizes to match exactly.
1765
1761
if name == 'embeddings.word_embeddings.weight' and data .shape [1 ] != self .vocab_size :
0 commit comments