@@ -1019,8 +1019,12 @@ def check_vocab_size(params: Params, vocab: Vocab, pad_vocab: bool = False) -> N
1019
1019
1020
1020
1021
1021
class OutputFile :
1022
- def __init__ (self , fname_out : Path , endianess :gguf .GGUFEndian = gguf .GGUFEndian .LITTLE ) -> None :
1023
- self .gguf = gguf .GGUFWriter (fname_out , gguf .MODEL_ARCH_NAMES [ARCH ], endianess = endianess )
1022
+ def __init__ (
1023
+ self , fname_out : Path , endianess : gguf .GGUFEndian = gguf .GGUFEndian .LITTLE
1024
+ ) -> None :
1025
+ self .gguf = gguf .GGUFWriter (
1026
+ fname_out , gguf .MODEL_ARCH_NAMES [ARCH ], endianess = endianess
1027
+ )
1024
1028
1025
1029
def add_meta_arch (self , params : Params ) -> None :
1026
1030
name = "LLaMA"
@@ -1029,28 +1033,28 @@ def add_meta_arch(self, params: Params) -> None:
1029
1033
if params .n_ctx == 4096 :
1030
1034
name = "LLaMA v2"
1031
1035
elif params .path_model is not None :
1032
- name = str (params .path_model .parent ).split ('/' )[- 1 ]
1036
+ name = str (params .path_model .parent ).split ("/" )[- 1 ]
1033
1037
1034
- self .gguf .add_name (name )
1035
- self .gguf .add_context_length (params .n_ctx )
1036
- self .gguf .add_embedding_length (params .n_embd )
1037
- self .gguf .add_block_count (params .n_layer )
1038
- self .gguf .add_feed_forward_length (params .n_ff )
1038
+ self .gguf .add_name (name )
1039
+ self .gguf .add_context_length (params .n_ctx )
1040
+ self .gguf .add_embedding_length (params .n_embd )
1041
+ self .gguf .add_block_count (params .n_layer )
1042
+ self .gguf .add_feed_forward_length (params .n_ff )
1039
1043
self .gguf .add_rope_dimension_count (params .n_embd // params .n_head )
1040
- self .gguf .add_head_count (params .n_head )
1041
- self .gguf .add_head_count_kv (params .n_head_kv )
1044
+ self .gguf .add_head_count (params .n_head )
1045
+ self .gguf .add_head_count_kv (params .n_head_kv )
1046
+
1047
+ if params .f_norm_eps is None :
1048
+ raise ValueError ("f_norm_eps is None" )
1049
+
1050
+ self .gguf .add_layer_norm_rms_eps (params .f_norm_eps )
1042
1051
1043
1052
if params .n_experts :
1044
1053
self .gguf .add_expert_count (params .n_experts )
1045
1054
1046
1055
if params .n_experts_used :
1047
1056
self .gguf .add_expert_used_count (params .n_experts_used )
1048
1057
1049
- if params .f_norm_eps :
1050
- self .gguf .add_layer_norm_rms_eps (params .f_norm_eps )
1051
- else :
1052
- raise ValueError ('f_norm_eps is None' )
1053
-
1054
1058
if params .f_rope_freq_base is not None :
1055
1059
self .gguf .add_rope_freq_base (params .f_rope_freq_base )
1056
1060
@@ -1068,18 +1072,44 @@ def add_meta_arch(self, params: Params) -> None:
1068
1072
if params .ftype is not None :
1069
1073
self .gguf .add_file_type (params .ftype )
1070
1074
1071
- def add_meta_vocab (self , vocab : Vocab ) -> None :
1075
+ def handle_tokenizer_model (self , vocab : Vocab ) -> str :
1076
+ # Map the vocab types to the supported tokenizer models
1077
+ tokenizer_model = {
1078
+ SentencePieceVocab : "llama" ,
1079
+ HfVocab : "llama" ,
1080
+ BpeVocab : "gpt2" ,
1081
+ }.get (type (vocab ))
1082
+
1083
+ # Block if vocab type is not predefined
1084
+ if tokenizer_model is None :
1085
+ raise ValueError ("Unknown vocab type: Not supported" )
1086
+
1087
+ return tokenizer_model
1088
+
1089
+ def extract_vocabulary_from_model (self , vocab : Vocab ) -> Tuple [list , list , list ]:
1072
1090
tokens = []
1073
1091
scores = []
1074
1092
toktypes = []
1093
+
1075
1094
# NOTE: `all_tokens` returns the base vocabulary and added tokens
1076
1095
for text , score , toktype in vocab .all_tokens ():
1077
1096
tokens .append (text )
1078
1097
scores .append (score )
1079
1098
toktypes .append (toktype )
1080
1099
1081
- vocab_type = vocab .get_vocab_type ()
1082
- self .gguf .add_tokenizer_model (vocab_type )
1100
+ return tokens , scores , toktypes
1101
+
1102
+ def add_meta_vocab (self , vocab : Vocab ) -> None :
1103
+ # Handle the tokenizer model
1104
+ tokenizer_model = self .handle_tokenizer_model (vocab )
1105
+
1106
+ # Ensure that tokenizer_model is added to the GGUF model
1107
+ self .gguf .add_tokenizer_model (tokenizer_model )
1108
+
1109
+ # Extract model vocabulary for model conversion
1110
+ tokens , scores , toktypes = self .extract_vocabulary_from_model (vocab )
1111
+
1112
+ # Add extracted token information for model conversion
1083
1113
self .gguf .add_token_list (tokens )
1084
1114
self .gguf .add_token_scores (scores )
1085
1115
self .gguf .add_token_types (toktypes )
@@ -1089,10 +1119,14 @@ def add_meta_special_vocab(self, svocab: gguf.SpecialVocab) -> None:
1089
1119
1090
1120
def add_tensor_info (self , name : str , tensor : LazyTensor ) -> None :
1091
1121
n_elements = int (np .prod (tensor .shape ))
1092
- raw_dtype = getattr (tensor .data_type , 'ggml_type' , None )
1093
- data_type = getattr (tensor .data_type , 'quantized_type' , None ) or tensor .data_type .dtype
1122
+ raw_dtype = getattr (tensor .data_type , "ggml_type" , None )
1123
+ data_type = (
1124
+ getattr (tensor .data_type , "quantized_type" , None ) or tensor .data_type .dtype
1125
+ )
1094
1126
data_nbytes = tensor .data_type .elements_to_bytes (n_elements )
1095
- self .gguf .add_tensor_info (name , tensor .shape , data_type , data_nbytes , raw_dtype = raw_dtype )
1127
+ self .gguf .add_tensor_info (
1128
+ name , tensor .shape , data_type , data_nbytes , raw_dtype = raw_dtype
1129
+ )
1096
1130
1097
1131
def write_meta (self ) -> None :
1098
1132
self .gguf .write_header_to_file ()
@@ -1106,11 +1140,14 @@ def close(self) -> None:
1106
1140
1107
1141
@staticmethod
1108
1142
def write_vocab_only (
1109
- fname_out : Path , params : Params , vocab : Vocab , svocab : gguf .SpecialVocab ,
1143
+ fname_out : Path ,
1144
+ params : Params ,
1145
+ vocab : Vocab ,
1146
+ svocab : gguf .SpecialVocab ,
1110
1147
endianess : gguf .GGUFEndian = gguf .GGUFEndian .LITTLE ,
1111
1148
pad_vocab : bool = False ,
1112
1149
) -> None :
1113
- check_vocab_size (params , vocab , pad_vocab = pad_vocab )
1150
+ check_vocab_size (params , vocab , pad_vocab = pad_vocab )
1114
1151
1115
1152
of = OutputFile (fname_out , endianess = endianess )
1116
1153
@@ -1138,12 +1175,17 @@ def maybe_do_quantize(item: tuple[DataType, NDArray]) -> NDArray:
1138
1175
1139
1176
@staticmethod
1140
1177
def write_all (
1141
- fname_out : Path , ftype : GGMLFileType , params : Params , model : LazyModel , vocab : Vocab , svocab : gguf .SpecialVocab ,
1178
+ fname_out : Path ,
1179
+ ftype : GGMLFileType ,
1180
+ params : Params ,
1181
+ model : LazyModel ,
1182
+ vocab : Vocab ,
1183
+ svocab : gguf .SpecialVocab ,
1142
1184
concurrency : int = DEFAULT_CONCURRENCY ,
1143
1185
endianess : gguf .GGUFEndian = gguf .GGUFEndian .LITTLE ,
1144
1186
pad_vocab : bool = False ,
1145
1187
) -> None :
1146
- check_vocab_size (params , vocab , pad_vocab = pad_vocab )
1188
+ check_vocab_size (params , vocab , pad_vocab = pad_vocab )
1147
1189
1148
1190
of = OutputFile (fname_out , endianess = endianess )
1149
1191
@@ -1160,18 +1202,30 @@ def write_all(
1160
1202
of .write_tensor_info ()
1161
1203
1162
1204
# tensor data
1163
- ndarrays_inner = bounded_parallel_map (OutputFile .do_item , model .items (), concurrency = concurrency )
1205
+ ndarrays_inner = bounded_parallel_map (
1206
+ OutputFile .do_item , model .items (), concurrency = concurrency
1207
+ )
1164
1208
if ftype == GGMLFileType .MostlyQ8_0 :
1165
- ndarrays = bounded_parallel_map (OutputFile .maybe_do_quantize , ndarrays_inner , concurrency = concurrency , max_workers = concurrency , use_processpool_executor = True )
1209
+ ndarrays = bounded_parallel_map (
1210
+ OutputFile .maybe_do_quantize ,
1211
+ ndarrays_inner ,
1212
+ concurrency = concurrency ,
1213
+ max_workers = concurrency ,
1214
+ use_processpool_executor = True ,
1215
+ )
1166
1216
else :
1167
1217
ndarrays = map (OutputFile .maybe_do_quantize , ndarrays_inner )
1168
1218
1169
1219
start = time .time ()
1170
- for i , ((name , lazy_tensor ), ndarray ) in enumerate (zip (model .items (), ndarrays )):
1220
+ for i , ((name , lazy_tensor ), ndarray ) in enumerate (
1221
+ zip (model .items (), ndarrays )
1222
+ ):
1171
1223
elapsed = time .time () - start
1172
- size = ' x ' .join (f"{ dim :6d} " for dim in lazy_tensor .shape )
1224
+ size = " x " .join (f"{ dim :6d} " for dim in lazy_tensor .shape )
1173
1225
padi = len (str (len (model )))
1174
- print (f"[{ i + 1 :{padi }d} /{ len (model )} ] Writing tensor { name :38s} | size { size :16} | type { lazy_tensor .data_type .name :4} | T+{ int (elapsed ):4} " )
1226
+ print (
1227
+ f"[{ i + 1 :{padi }d} /{ len (model )} ] Writing tensor { name :38s} | size { size :16} | type { lazy_tensor .data_type .name :4} | T+{ int (elapsed ):4} "
1228
+ )
1175
1229
of .gguf .write_tensor_data (ndarray )
1176
1230
1177
1231
of .close ()
0 commit comments