Skip to content

Commit 7cae773

Browse files
committed
convert-*.py: refactor to per_model_weight_count_estimation()
1 parent 0d0a5fe commit 7cae773

File tree

3 files changed

+58
-49
lines changed

3 files changed

+58
-49
lines changed

convert-hf-to-gguf.py

Lines changed: 32 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -121,9 +121,12 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
121121
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
122122
self.tensor_names = None
123123
self.metadata = metadata
124+
125+
model_tensors = self.get_tensors()
126+
124127
if self.ftype == gguf.LlamaFileType.GUESSED:
125128
# NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie.
126-
_, first_tensor = next(self.get_tensors())
129+
_, first_tensor = next(model_tensors)
127130
if first_tensor.dtype == torch.float16:
128131
logger.info(f"choosing --outtype f16 from first tensor type ({first_tensor.dtype})")
129132
self.ftype = gguf.LlamaFileType.MOSTLY_F16
@@ -160,8 +163,35 @@ def get_model_name(metadata, huggingface_parameters, dir_model, model_arch):
160163
# Get Expert Count From huggingface_parameters
161164
expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None
162165

166+
def per_model_weight_count_estimation(tensors, expert_count):
167+
# TODO: Ensure parameter count is accurate throughout various model type
168+
# May currently overestimate parameter count in Mamba model because
169+
# output weights is tied with token embeddings.
170+
sum_weight_estimate = 0
171+
for name, data_torch in tensors:
172+
# Got A Tensor
173+
174+
# We don't need these
175+
if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
176+
continue
177+
178+
# Calculate Tensor Volume
179+
sum_weights_in_tensor = 1
180+
for dim in data_torch.shape:
181+
sum_weights_in_tensor *= dim
182+
183+
# Add Tensor Volume To Running Count
184+
sum_weight_estimate += sum_weights_in_tensor
185+
186+
# Calculate weight estimate per model
187+
per_model_weight_estimate = (sum_weight_estimate / expert_count) if (expert_count > 0) else sum_weight_estimate
188+
189+
return per_model_weight_estimate
190+
191+
weight_estimate = per_model_weight_count_estimation(model_tensors, expert_count)
192+
163193
# Generate default filename based on model specification and available metadata
164-
self.fname_default = gguf.naming_convention(self.model_name, self.metadata.version, expert_count, self.parameter_count(), encodingScheme)
194+
self.fname_default = gguf.naming_convention(self.model_name, self.metadata.version, expert_count, weight_estimate, encodingScheme)
165195

166196
# Filename Output
167197
if fname_out is not None:
@@ -343,28 +373,6 @@ def extra_f16_tensors(self, name: str, new_name: str, bid: int | None, n_dims: i
343373

344374
return False
345375

346-
def parameter_count(self):
347-
# TODO: Ensure parameter count is accurate throughout various model type
348-
# May currently overestimate parameter count in Mamba model because
349-
# output weights is tied with token embeddings.
350-
total_model_parameters = 0
351-
for name, data_torch in self.get_tensors():
352-
# Got A Tensor
353-
354-
# We don't need these
355-
if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
356-
continue
357-
358-
# Calculate Tensor Volume
359-
sum_weights_in_tensor = 1
360-
for dim in data_torch.shape:
361-
sum_weights_in_tensor *= dim
362-
363-
# Add Tensor Volume To Running Count
364-
total_model_parameters += sum_weights_in_tensor
365-
366-
return total_model_parameters
367-
368376
def write_tensors(self):
369377
max_name_len = max(len(s) for _, s in self.tensor_map.mapping.values()) + len(".weight,")
370378

examples/convert-legacy-llama.py

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,18 +1020,28 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT
10201020
raise ValueError(f"Unexpected combination of types: {name_to_type}")
10211021

10221022

1023-
def model_parameter_count(model: LazyModel) -> int:
1023+
def per_model_weight_count_estimation(model: LazyModel, expert_count:int) -> int:
10241024
# TODO: Ensure parameter count is accurate throughout various model type
1025-
total_model_parameters = 0
1025+
sum_weight_estimate = 0
10261026
for name, lazy_tensor in model.items():
1027+
# We don't need these
1028+
if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
1029+
continue
1030+
10271031
# Got A Tensor
10281032
sum_weights_in_tensor = 1
1033+
10291034
# Tensor Volume
10301035
for dim in lazy_tensor.shape:
10311036
sum_weights_in_tensor *= dim
1037+
10321038
# Add Tensor Volume To Running Count
1033-
total_model_parameters += sum_weights_in_tensor
1034-
return total_model_parameters
1039+
sum_weight_estimate += sum_weights_in_tensor
1040+
1041+
# Calculate weight estimate per model
1042+
per_model_weight_estimate = (sum_weight_estimate / expert_count) if (expert_count > 0) else sum_weight_estimate
1043+
1044+
return per_model_weight_estimate
10351045

10361046

10371047
def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel:
@@ -1213,18 +1223,10 @@ def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) ->
12131223
return vocab, special_vocab
12141224

12151225

1216-
def default_convention_outfile(file_type: GGMLFileType, params: Params, model_params_count: int, metadata: Metadata) -> str:
1217-
1218-
name = None
1219-
if metadata is not None and metadata.name is not None:
1220-
name = metadata.name
1221-
elif params.path_model is not None:
1222-
name = params.path_model.name
1223-
1226+
def default_convention_outfile(file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: Metadata) -> str:
1227+
name = metadata.name if metadata is not None and metadata.name is not None else model_name
12241228
version = metadata.version if metadata is not None and metadata.version is not None else None
12251229

1226-
expert_count = params.n_experts if params.n_experts is not None else None
1227-
12281230
encodingScheme = {
12291231
GGMLFileType.AllF32: "F32",
12301232
GGMLFileType.MostlyF16: "F16",
@@ -1234,8 +1236,8 @@ def default_convention_outfile(file_type: GGMLFileType, params: Params, model_pa
12341236
return gguf.naming_convention(name, version, expert_count, model_params_count, encodingScheme)
12351237

12361238

1237-
def default_outfile(model_paths: list[Path], file_type: GGMLFileType, params: Params, model_params_count: int, metadata: Metadata) -> Path:
1238-
default_filename = default_convention_outfile(file_type, params, model_params_count, metadata)
1239+
def default_outfile(model_paths: list[Path], file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: Metadata) -> Path:
1240+
default_filename = default_convention_outfile(file_type, model_name, expert_count, model_params_count, metadata)
12391241
ret = model_paths[0].parent / f"{default_filename}.gguf"
12401242
if ret in model_paths:
12411243
logger.error(
@@ -1293,9 +1295,9 @@ def main(args_in: list[str] | None = None) -> None:
12931295
model_plus = load_some_model(args.model)
12941296
params = Params.load(model_plus)
12951297
model = convert_model_names(model_plus.model, params, args.skip_unknown)
1296-
model_params_count = model_parameter_count(model_plus.model)
1298+
model_params_count = per_model_weight_count_estimation(model_plus.model, params.n_experts)
12971299
ftype = pick_output_type(model, args.outtype)
1298-
print(f"{default_convention_outfile(ftype, params, model_params_count, metadata)}") # noqa: NP100
1300+
print(f"{default_convention_outfile(ftype, params.path_model.name, params.n_experts, model_params_count, metadata)}") # noqa: NP100
12991301
return
13001302

13011303
if args.no_vocab and args.vocab_only:
@@ -1311,8 +1313,8 @@ def main(args_in: list[str] | None = None) -> None:
13111313
else:
13121314
model_plus = ModelPlus(model = {}, paths = [args.model / 'dummy'], format = 'none', vocab = None)
13131315

1314-
model_params_count = model_parameter_count(model_plus.model)
1315-
logger.info(f"model parameters count : {model_params_count} ({gguf.model_parameter_count_rounded_notation(model_params_count)})")
1316+
model_params_count = per_model_weight_count_estimation(model_plus.model, params.n_experts)
1317+
logger.info(f"model parameters count : {model_params_count} ({gguf.model_weight_count_rounded_notation(model_params_count)})")
13161318

13171319
if args.dump:
13181320
do_dump_model(model_plus)
@@ -1380,7 +1382,7 @@ def main(args_in: list[str] | None = None) -> None:
13801382
model = convert_model_names(model, params, args.skip_unknown)
13811383
ftype = pick_output_type(model, args.outtype)
13821384
model = convert_to_output_type(model, ftype)
1383-
outfile = args.outfile or default_outfile(model_plus.paths, ftype, params, model_params_count, metadata)
1385+
outfile = args.outfile or default_outfile(model_plus.paths, ftype, params.path_model.name, params.n_experts, model_params_count, metadata)
13841386

13851387
params.ftype = ftype
13861388
logger.info(f"Writing {outfile}, format {ftype}")

gguf-py/gguf/utility.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from __future__ import annotations
22

3-
4-
def model_parameter_count_rounded_notation(model_params_count: int) -> str:
3+
def model_weight_count_rounded_notation(model_params_count: int) -> str:
54
if model_params_count > 1e15 :
65
# Quadrillion Of Parameters
76
scaled_model_params = model_params_count * 1e-15
@@ -29,7 +28,7 @@ def naming_convention(model_name: str, version_string:str, expert_count_int:int,
2928
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
3029
name = model_name.strip().replace(' ', '-') if model_name is not None else "ggml-model"
3130
version = f"-{version_string}" if version_string is not None else ""
32-
expert_count_chunk = f"{expert_count_int}x" if expert_count_int is not None else ""
33-
parameters = model_parameter_count_rounded_notation(model_params_count)
31+
expert_count_chunk = f"{expert_count_int}x" if expert_count_int is not None and expert_count_int > 0 else ""
32+
parameters = model_weight_count_rounded_notation(model_params_count)
3433
encodingScheme = encodingScheme.upper()
3534
return f"{name}{version}-{expert_count_chunk}{parameters}-{encodingScheme}"

0 commit comments

Comments
 (0)