@@ -336,6 +336,39 @@ def load(model_plus: ModelPlus) -> Params:
336
336
337
337
return params
338
338
339
+ @dataclass
340
+ class Metadata :
341
+ name : Optional [str ] = None
342
+ author : Optional [str ] = None
343
+ version : Optional [str ] = None
344
+ url : Optional [str ] = None
345
+ description : Optional [str ] = None
346
+ licence : Optional [str ] = None
347
+ source_url : Optional [str ] = None
348
+ source_hf_repo : Optional [str ] = None
349
+
350
+ @staticmethod
351
+ def load (metadata_path : Path ) -> "Metadata" :
352
+ if metadata_path is None or not metadata_path .exists ():
353
+ return Metadata ()
354
+
355
+ with open (metadata_path , 'r' ) as file :
356
+ data = json .load (file )
357
+
358
+ # Create a new Metadata instance
359
+ metadata = Metadata ()
360
+
361
+ # Assigning values to Metadata attributes if they exist in the JSON file
362
+ metadata .name = data .get ("general.name" )
363
+ metadata .author = data .get ("general.author" )
364
+ metadata .version = data .get ("general.version" )
365
+ metadata .url = data .get ("general.url" )
366
+ metadata .description = data .get ("general.description" )
367
+ metadata .license = data .get ("general.license" )
368
+ metadata .source_url = data .get ("general.source_url" )
369
+ metadata .source_hf_repo = data .get ("general.source_hf_repo" )
370
+
371
+ return metadata
339
372
340
373
#
341
374
# vocab
@@ -1053,21 +1086,41 @@ class OutputFile:
1053
1086
def __init__ (self , fname_out : Path , endianess :gguf .GGUFEndian = gguf .GGUFEndian .LITTLE ):
1054
1087
self .gguf = gguf .GGUFWriter (fname_out , gguf .MODEL_ARCH_NAMES [ARCH ], endianess = endianess )
1055
1088
1056
- def add_meta_arch (self , params : Params ) -> None :
1089
+ def add_meta_model (self , params : Params , metadata : Metadata ) -> None :
1090
+ # Metadata About The Model And It's Provenence
1057
1091
name = "LLaMA"
1058
-
1059
- # TODO: better logic to determine model name
1060
- if params .n_ctx == 4096 :
1061
- name = "LLaMA v2"
1092
+ if metadata is not None and metadata .name is not None :
1093
+ name = metadata .name
1062
1094
elif params .path_model is not None :
1063
- name = str (params .path_model .parent ).split ('/' )[- 1 ]
1064
-
1065
- self .gguf .add_name (name )
1066
- self .gguf .add_vocab_size (params .n_vocab )
1067
- self .gguf .add_context_length (params .n_ctx )
1068
- self .gguf .add_embedding_length (params .n_embd )
1069
- self .gguf .add_block_count (params .n_layer )
1070
- self .gguf .add_feed_forward_length (params .n_ff )
1095
+ name = str (params .path_model .parent ).split ("/" )[- 1 ]
1096
+ elif params .n_ctx == 4096 :
1097
+ # Heuristic detection of LLaMA v2 model
1098
+ name = "LLaMA v2"
1099
+
1100
+ self .gguf .add_name (name )
1101
+
1102
+ if metadata is not None :
1103
+ if metadata .author is not None :
1104
+ self .gguf .add_author (metadata .author )
1105
+ if metadata .version is not None :
1106
+ self .gguf .add_version (metadata .version )
1107
+ if metadata .url is not None :
1108
+ self .gguf .add_url (metadata .url )
1109
+ if metadata .description is not None :
1110
+ self .gguf .add_description (metadata .description )
1111
+ if metadata .licence is not None :
1112
+ self .gguf .add_licence (metadata .licence )
1113
+ if metadata .source_url is not None :
1114
+ self .gguf .add_source_url (metadata .source_url )
1115
+ if metadata .source_hf_repo is not None :
1116
+ self .gguf .add_source_hf_repo (metadata .source_hf_repo )
1117
+
1118
+ def add_meta_arch (self , params : Params ) -> None :
1119
+ # Metadata About The Neural Architecture Itself
1120
+ self .gguf .add_context_length (params .n_ctx )
1121
+ self .gguf .add_embedding_length (params .n_embd )
1122
+ self .gguf .add_block_count (params .n_layer )
1123
+ self .gguf .add_feed_forward_length (params .n_ff )
1071
1124
self .gguf .add_rope_dimension_count (params .n_embd // params .n_head )
1072
1125
self .gguf .add_head_count (params .n_head )
1073
1126
self .gguf .add_head_count_kv (params .n_head_kv )
@@ -1170,13 +1223,14 @@ def close(self) -> None:
1170
1223
@staticmethod
1171
1224
def write_vocab_only (
1172
1225
fname_out : Path , params : Params , vocab : Vocab , svocab : gguf .SpecialVocab ,
1173
- endianess : gguf .GGUFEndian = gguf .GGUFEndian .LITTLE , pad_vocab : bool = False ,
1226
+ endianess : gguf .GGUFEndian = gguf .GGUFEndian .LITTLE , pad_vocab : bool = False , metadata : Metadata = None ,
1174
1227
) -> None :
1175
1228
check_vocab_size (params , vocab , pad_vocab = pad_vocab )
1176
1229
1177
1230
of = OutputFile (fname_out , endianess = endianess )
1178
1231
1179
1232
# meta data
1233
+ of .add_meta_model (params , metadata )
1180
1234
of .add_meta_arch (params )
1181
1235
of .add_meta_vocab (vocab )
1182
1236
of .add_meta_special_vocab (svocab )
@@ -1203,12 +1257,14 @@ def write_all(
1203
1257
fname_out : Path , ftype : GGMLFileType , params : Params , model : LazyModel , vocab : BaseVocab , svocab : gguf .SpecialVocab ,
1204
1258
concurrency : int = DEFAULT_CONCURRENCY , endianess : gguf .GGUFEndian = gguf .GGUFEndian .LITTLE ,
1205
1259
pad_vocab : bool = False ,
1260
+ metadata : Metadata = None ,
1206
1261
) -> None :
1207
1262
check_vocab_size (params , vocab , pad_vocab = pad_vocab )
1208
1263
1209
1264
of = OutputFile (fname_out , endianess = endianess )
1210
1265
1211
1266
# meta data
1267
+ of .add_meta_model (params , metadata )
1212
1268
of .add_meta_arch (params )
1213
1269
if isinstance (vocab , Vocab ):
1214
1270
of .add_meta_vocab (vocab )
@@ -1244,6 +1300,37 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT
1244
1300
raise ValueError (f"Unexpected combination of types: { name_to_type } " )
1245
1301
1246
1302
1303
+ def model_parameter_count (model : LazyModel ) -> int :
1304
+ total_model_parameters = 0
1305
+ for i , (name , lazy_tensor ) in enumerate (model .items ()):
1306
+ sum_weights_in_tensor = 1
1307
+ for dim in lazy_tensor .shape :
1308
+ sum_weights_in_tensor *= dim
1309
+ total_model_parameters += sum_weights_in_tensor
1310
+ return total_model_parameters
1311
+
1312
+
1313
+ def model_parameter_count_rounded_notation (model_params_count : int ) -> str :
1314
+ if model_params_count > 1e12 :
1315
+ # Trillions Of Parameters
1316
+ scaled_model_params = model_params_count * 1e-12
1317
+ scale_suffix = "T"
1318
+ elif model_params_count > 1e9 :
1319
+ # Billions Of Parameters
1320
+ scaled_model_params = model_params_count * 1e-9
1321
+ scale_suffix = "B"
1322
+ elif model_params_count > 1e6 :
1323
+ # Millions Of Parameters
1324
+ scaled_model_params = model_params_count * 1e-6
1325
+ scale_suffix = "M"
1326
+ else :
1327
+ # Thousands Of Parameters
1328
+ scaled_model_params = model_params_count * 1e-3
1329
+ scale_suffix = "K"
1330
+
1331
+ return f"{ round (scaled_model_params )} { scale_suffix } "
1332
+
1333
+
1247
1334
def convert_to_output_type (model : LazyModel , output_type : GGMLFileType ) -> LazyModel :
1248
1335
return {name : tensor .astype (output_type .type_for_tensor (name , tensor ))
1249
1336
for (name , tensor ) in model .items ()}
@@ -1423,13 +1510,26 @@ def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) ->
1423
1510
return vocab , special_vocab
1424
1511
1425
1512
1426
- def default_outfile (model_paths : list [Path ], file_type : GGMLFileType ) -> Path :
1427
- namestr = {
1428
- GGMLFileType .AllF32 : "f32 " ,
1429
- GGMLFileType .MostlyF16 : "f16 " ,
1430
- GGMLFileType .MostlyQ8_0 :"q8_0 " ,
1513
+ def default_outfile (model_paths : list [Path ], file_type : GGMLFileType , params : Params , model_params_count : int , metadata : Metadata ) -> Path :
1514
+ quantization = {
1515
+ GGMLFileType .AllF32 : "F32 " ,
1516
+ GGMLFileType .MostlyF16 : "F16 " ,
1517
+ GGMLFileType .MostlyQ8_0 : "Q8_0 " ,
1431
1518
}[file_type ]
1432
- ret = model_paths [0 ].parent / f"ggml-model-{ namestr } .gguf"
1519
+
1520
+ parameters = model_parameter_count_rounded_notation (model_params_count )
1521
+
1522
+ version = ""
1523
+ if metadata is not None and metadata .version is not None :
1524
+ version = f"-{ metadata .version } "
1525
+
1526
+ name = "ggml-model"
1527
+ if metadata is not None and metadata .name is not None :
1528
+ name = metadata .name
1529
+ elif params .path_model is not None :
1530
+ name = params .path_model .name
1531
+
1532
+ ret = model_paths [0 ].parent / f"{ name } { version } -{ parameters } -{ quantization } .gguf"
1433
1533
if ret in model_paths :
1434
1534
sys .stderr .write (
1435
1535
f"Error: Default output path ({ ret } ) would overwrite the input. "
@@ -1466,8 +1566,12 @@ def main(args_in: list[str] | None = None) -> None:
1466
1566
parser .add_argument ("--big-endian" , action = "store_true" , help = "model is executed on big endian machine" )
1467
1567
parser .add_argument ("--pad-vocab" , action = "store_true" , help = "add pad tokens when model vocab expects more than tokenizer metadata provides" )
1468
1568
parser .add_argument ("--skip-unknown" , action = "store_true" , help = "skip unknown tensor names instead of failing" )
1569
+ parser .add_argument ("--metadata" , type = Path , help = "Specify the path for a metadata file" )
1469
1570
1470
1571
args = parser .parse_args (args_in )
1572
+
1573
+ metadata = Metadata .load (args .metadata )
1574
+
1471
1575
if args .no_vocab and args .vocab_only :
1472
1576
raise ValueError ("--vocab-only does not make sense with --no-vocab" )
1473
1577
@@ -1481,6 +1585,9 @@ def main(args_in: list[str] | None = None) -> None:
1481
1585
else :
1482
1586
model_plus = ModelPlus (model = {}, paths = [args .model / 'dummy' ], format = 'none' , vocab = None )
1483
1587
1588
+ model_params_count = model_parameter_count (model_plus .model )
1589
+ print (f"model parameters count : { model_params_count } ({ model_parameter_count_rounded_notation (model_params_count )} )" )
1590
+
1484
1591
if args .dump :
1485
1592
do_dump_model (model_plus )
1486
1593
return
@@ -1520,27 +1627,30 @@ def main(args_in: list[str] | None = None) -> None:
1520
1627
raise ValueError ("need --outfile if using --vocab-only" )
1521
1628
outfile = args .outfile
1522
1629
OutputFile .write_vocab_only (outfile , params , vocab , special_vocab ,
1523
- endianess = endianess , pad_vocab = args .pad_vocab )
1630
+ endianess = endianess , pad_vocab = args .pad_vocab , metadata = metadata )
1524
1631
print (f"Wrote { outfile } " )
1525
1632
return
1526
1633
1527
1634
if model_plus .vocab is not None and args .vocab_dir is None and not args .no_vocab :
1528
1635
vocab = model_plus .vocab
1529
1636
1530
1637
print (f"Vocab info: { vocab } " )
1531
- print (f"Special vocab info: { special_vocab } " )
1638
+ special_vocab = gguf .SpecialVocab (model_plus .paths [0 ].parent ,
1639
+ load_merges = True ,
1640
+ n_vocab = vocab .vocab_size )
1532
1641
1642
+ print (f"Special vocab info: { special_vocab } " )
1533
1643
model = model_plus .model
1534
1644
model = convert_model_names (model , params , args .skip_unknown )
1535
1645
ftype = pick_output_type (model , args .outtype )
1536
1646
model = convert_to_output_type (model , ftype )
1537
- outfile = args .outfile or default_outfile (model_plus .paths , ftype )
1647
+ outfile = args .outfile or default_outfile (model_plus .paths , ftype , params , model_params_count , metadata )
1538
1648
1539
1649
params .ftype = ftype
1540
1650
print (f"Writing { outfile } , format { ftype } " )
1541
1651
1542
1652
OutputFile .write_all (outfile , ftype , params , model , vocab , special_vocab ,
1543
- concurrency = args .concurrency , endianess = endianess , pad_vocab = args .pad_vocab )
1653
+ concurrency = args .concurrency , endianess = endianess , pad_vocab = args .pad_vocab , metadata = metadata )
1544
1654
print (f"Wrote { outfile } " )
1545
1655
1546
1656
0 commit comments