@@ -1020,18 +1020,28 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT
1020
1020
raise ValueError (f"Unexpected combination of types: { name_to_type } " )
1021
1021
1022
1022
1023
- def model_parameter_count (model : LazyModel ) -> int :
1023
+ def per_model_weight_count_estimation (model : LazyModel , expert_count : int ) -> int :
1024
1024
# TODO: Ensure parameter count is accurate throughout various model type
1025
- total_model_parameters = 0
1025
+ sum_weight_estimate = 0
1026
1026
for name , lazy_tensor in model .items ():
1027
+ # We don't need these
1028
+ if name .endswith ((".attention.masked_bias" , ".attention.bias" , ".rotary_emb.inv_freq" )):
1029
+ continue
1030
+
1027
1031
# Got A Tensor
1028
1032
sum_weights_in_tensor = 1
1033
+
1029
1034
# Tensor Volume
1030
1035
for dim in lazy_tensor .shape :
1031
1036
sum_weights_in_tensor *= dim
1037
+
1032
1038
# Add Tensor Volume To Running Count
1033
- total_model_parameters += sum_weights_in_tensor
1034
- return total_model_parameters
1039
+ sum_weight_estimate += sum_weights_in_tensor
1040
+
1041
+ # Calculate weight estimate per model
1042
+ per_model_weight_estimate = (sum_weight_estimate / expert_count ) if (expert_count > 0 ) else sum_weight_estimate
1043
+
1044
+ return per_model_weight_estimate
1035
1045
1036
1046
1037
1047
def convert_to_output_type (model : LazyModel , output_type : GGMLFileType ) -> LazyModel :
@@ -1213,18 +1223,10 @@ def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) ->
1213
1223
return vocab , special_vocab
1214
1224
1215
1225
1216
- def default_convention_outfile (file_type : GGMLFileType , params : Params , model_params_count : int , metadata : Metadata ) -> str :
1217
-
1218
- name = None
1219
- if metadata is not None and metadata .name is not None :
1220
- name = metadata .name
1221
- elif params .path_model is not None :
1222
- name = params .path_model .name
1223
-
1226
+ def default_convention_outfile (file_type : GGMLFileType , model_name :str , expert_count :int , model_params_count : int , metadata : Metadata ) -> str :
1227
+ name = metadata .name if metadata is not None and metadata .name is not None else model_name
1224
1228
version = metadata .version if metadata is not None and metadata .version is not None else None
1225
1229
1226
- expert_count = params .n_experts if params .n_experts is not None else None
1227
-
1228
1230
encodingScheme = {
1229
1231
GGMLFileType .AllF32 : "F32" ,
1230
1232
GGMLFileType .MostlyF16 : "F16" ,
@@ -1234,8 +1236,8 @@ def default_convention_outfile(file_type: GGMLFileType, params: Params, model_pa
1234
1236
return gguf .naming_convention (name , version , expert_count , model_params_count , encodingScheme )
1235
1237
1236
1238
1237
- def default_outfile (model_paths : list [Path ], file_type : GGMLFileType , params : Params , model_params_count : int , metadata : Metadata ) -> Path :
1238
- default_filename = default_convention_outfile (file_type , params , model_params_count , metadata )
1239
+ def default_outfile (model_paths : list [Path ], file_type : GGMLFileType , model_name : str , expert_count : int , model_params_count : int , metadata : Metadata ) -> Path :
1240
+ default_filename = default_convention_outfile (file_type , model_name , expert_count , model_params_count , metadata )
1239
1241
ret = model_paths [0 ].parent / f"{ default_filename } .gguf"
1240
1242
if ret in model_paths :
1241
1243
logger .error (
@@ -1293,9 +1295,9 @@ def main(args_in: list[str] | None = None) -> None:
1293
1295
model_plus = load_some_model (args .model )
1294
1296
params = Params .load (model_plus )
1295
1297
model = convert_model_names (model_plus .model , params , args .skip_unknown )
1296
- model_params_count = model_parameter_count (model_plus .model )
1298
+ model_params_count = per_model_weight_count_estimation (model_plus .model , params . n_experts )
1297
1299
ftype = pick_output_type (model , args .outtype )
1298
- print (f"{ default_convention_outfile (ftype , params , model_params_count , metadata )} " ) # noqa: NP100
1300
+ print (f"{ default_convention_outfile (ftype , params . path_model . name , params . n_experts , model_params_count , metadata )} " ) # noqa: NP100
1299
1301
return
1300
1302
1301
1303
if args .no_vocab and args .vocab_only :
@@ -1311,8 +1313,8 @@ def main(args_in: list[str] | None = None) -> None:
1311
1313
else :
1312
1314
model_plus = ModelPlus (model = {}, paths = [args .model / 'dummy' ], format = 'none' , vocab = None )
1313
1315
1314
- model_params_count = model_parameter_count (model_plus .model )
1315
- logger .info (f"model parameters count : { model_params_count } ({ gguf .model_parameter_count_rounded_notation (model_params_count )} )" )
1316
+ model_params_count = per_model_weight_count_estimation (model_plus .model , params . n_experts )
1317
+ logger .info (f"model parameters count : { model_params_count } ({ gguf .model_weight_count_rounded_notation (model_params_count )} )" )
1316
1318
1317
1319
if args .dump :
1318
1320
do_dump_model (model_plus )
@@ -1380,7 +1382,7 @@ def main(args_in: list[str] | None = None) -> None:
1380
1382
model = convert_model_names (model , params , args .skip_unknown )
1381
1383
ftype = pick_output_type (model , args .outtype )
1382
1384
model = convert_to_output_type (model , ftype )
1383
- outfile = args .outfile or default_outfile (model_plus .paths , ftype , params , model_params_count , metadata )
1385
+ outfile = args .outfile or default_outfile (model_plus .paths , ftype , params . path_model . name , params . n_experts , model_params_count , metadata )
1384
1386
1385
1387
params .ftype = ftype
1386
1388
logger .info (f"Writing { outfile } , format { ftype } " )
0 commit comments