Skip to content

Commit da064a8

Browse files
committed
convert.py: Outfile default name change and additional metadata support
1 parent a307375 commit da064a8

File tree

4 files changed

+143
-24
lines changed

4 files changed

+143
-24
lines changed

convert.py

Lines changed: 134 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,39 @@ def load(model_plus: ModelPlus) -> Params:
336336

337337
return params
338338

339+
@dataclass
340+
class Metadata:
341+
name: Optional[str] = None
342+
author: Optional[str] = None
343+
version: Optional[str] = None
344+
url: Optional[str] = None
345+
description: Optional[str] = None
346+
licence: Optional[str] = None
347+
source_url: Optional[str] = None
348+
source_hf_repo: Optional[str] = None
349+
350+
@staticmethod
351+
def load(metadata_path: Path) -> "Metadata":
352+
if metadata_path is None or not metadata_path.exists():
353+
return Metadata()
354+
355+
with open(metadata_path, 'r') as file:
356+
data = json.load(file)
357+
358+
# Create a new Metadata instance
359+
metadata = Metadata()
360+
361+
# Assigning values to Metadata attributes if they exist in the JSON file
362+
metadata.name = data.get("general.name")
363+
metadata.author = data.get("general.author")
364+
metadata.version = data.get("general.version")
365+
metadata.url = data.get("general.url")
366+
metadata.description = data.get("general.description")
367+
metadata.license = data.get("general.license")
368+
metadata.source_url = data.get("general.source_url")
369+
metadata.source_hf_repo = data.get("general.source_hf_repo")
370+
371+
return metadata
339372

340373
#
341374
# vocab
@@ -1053,21 +1086,41 @@ class OutputFile:
10531086
def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE):
10541087
self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess)
10551088

1056-
def add_meta_arch(self, params: Params) -> None:
1089+
def add_meta_model(self, params: Params, metadata: Metadata) -> None:
1090+
# Metadata About The Model And It's Provenence
10571091
name = "LLaMA"
1058-
1059-
# TODO: better logic to determine model name
1060-
if params.n_ctx == 4096:
1061-
name = "LLaMA v2"
1092+
if metadata is not None and metadata.name is not None:
1093+
name = metadata.name
10621094
elif params.path_model is not None:
1063-
name = str(params.path_model.parent).split('/')[-1]
1064-
1065-
self.gguf.add_name (name)
1066-
self.gguf.add_vocab_size (params.n_vocab)
1067-
self.gguf.add_context_length (params.n_ctx)
1068-
self.gguf.add_embedding_length (params.n_embd)
1069-
self.gguf.add_block_count (params.n_layer)
1070-
self.gguf.add_feed_forward_length (params.n_ff)
1095+
name = str(params.path_model.parent).split("/")[-1]
1096+
elif params.n_ctx == 4096:
1097+
# Heuristic detection of LLaMA v2 model
1098+
name = "LLaMA v2"
1099+
1100+
self.gguf.add_name(name)
1101+
1102+
if metadata is not None:
1103+
if metadata.author is not None:
1104+
self.gguf.add_author(metadata.author)
1105+
if metadata.version is not None:
1106+
self.gguf.add_version(metadata.version)
1107+
if metadata.url is not None:
1108+
self.gguf.add_url(metadata.url)
1109+
if metadata.description is not None:
1110+
self.gguf.add_description(metadata.description)
1111+
if metadata.licence is not None:
1112+
self.gguf.add_licence(metadata.licence)
1113+
if metadata.source_url is not None:
1114+
self.gguf.add_source_url(metadata.source_url)
1115+
if metadata.source_hf_repo is not None:
1116+
self.gguf.add_source_hf_repo(metadata.source_hf_repo)
1117+
1118+
def add_meta_arch(self, params: Params) -> None:
1119+
# Metadata About The Neural Architecture Itself
1120+
self.gguf.add_context_length(params.n_ctx)
1121+
self.gguf.add_embedding_length(params.n_embd)
1122+
self.gguf.add_block_count(params.n_layer)
1123+
self.gguf.add_feed_forward_length(params.n_ff)
10711124
self.gguf.add_rope_dimension_count(params.n_embd // params.n_head)
10721125
self.gguf.add_head_count (params.n_head)
10731126
self.gguf.add_head_count_kv (params.n_head_kv)
@@ -1170,13 +1223,14 @@ def close(self) -> None:
11701223
@staticmethod
11711224
def write_vocab_only(
11721225
fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab,
1173-
endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE, pad_vocab: bool = False,
1226+
endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE, pad_vocab: bool = False, metadata: Metadata = None,
11741227
) -> None:
11751228
check_vocab_size(params, vocab, pad_vocab=pad_vocab)
11761229

11771230
of = OutputFile(fname_out, endianess=endianess)
11781231

11791232
# meta data
1233+
of.add_meta_model(params, metadata)
11801234
of.add_meta_arch(params)
11811235
of.add_meta_vocab(vocab)
11821236
of.add_meta_special_vocab(svocab)
@@ -1203,12 +1257,14 @@ def write_all(
12031257
fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: BaseVocab, svocab: gguf.SpecialVocab,
12041258
concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE,
12051259
pad_vocab: bool = False,
1260+
metadata: Metadata = None,
12061261
) -> None:
12071262
check_vocab_size(params, vocab, pad_vocab=pad_vocab)
12081263

12091264
of = OutputFile(fname_out, endianess=endianess)
12101265

12111266
# meta data
1267+
of.add_meta_model(params, metadata)
12121268
of.add_meta_arch(params)
12131269
if isinstance(vocab, Vocab):
12141270
of.add_meta_vocab(vocab)
@@ -1244,6 +1300,37 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT
12441300
raise ValueError(f"Unexpected combination of types: {name_to_type}")
12451301

12461302

1303+
def model_parameter_count(model: LazyModel) -> int:
1304+
total_model_parameters = 0
1305+
for i, (name, lazy_tensor) in enumerate(model.items()):
1306+
sum_weights_in_tensor = 1
1307+
for dim in lazy_tensor.shape:
1308+
sum_weights_in_tensor *= dim
1309+
total_model_parameters += sum_weights_in_tensor
1310+
return total_model_parameters
1311+
1312+
1313+
def model_parameter_count_rounded_notation(model_params_count: int) -> str:
1314+
if model_params_count > 1e12 :
1315+
# Trillions Of Parameters
1316+
scaled_model_params = model_params_count * 1e-12
1317+
scale_suffix = "T"
1318+
elif model_params_count > 1e9 :
1319+
# Billions Of Parameters
1320+
scaled_model_params = model_params_count * 1e-9
1321+
scale_suffix = "B"
1322+
elif model_params_count > 1e6 :
1323+
# Millions Of Parameters
1324+
scaled_model_params = model_params_count * 1e-6
1325+
scale_suffix = "M"
1326+
else:
1327+
# Thousands Of Parameters
1328+
scaled_model_params = model_params_count * 1e-3
1329+
scale_suffix = "K"
1330+
1331+
return f"{round(scaled_model_params)}{scale_suffix}"
1332+
1333+
12471334
def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel:
12481335
return {name: tensor.astype(output_type.type_for_tensor(name, tensor))
12491336
for (name, tensor) in model.items()}
@@ -1423,13 +1510,26 @@ def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) ->
14231510
return vocab, special_vocab
14241511

14251512

1426-
def default_outfile(model_paths: list[Path], file_type: GGMLFileType) -> Path:
1427-
namestr = {
1428-
GGMLFileType.AllF32: "f32",
1429-
GGMLFileType.MostlyF16: "f16",
1430-
GGMLFileType.MostlyQ8_0:"q8_0",
1513+
def default_outfile(model_paths: list[Path], file_type: GGMLFileType, params: Params, model_params_count: int, metadata: Metadata) -> Path:
1514+
quantization = {
1515+
GGMLFileType.AllF32: "F32",
1516+
GGMLFileType.MostlyF16: "F16",
1517+
GGMLFileType.MostlyQ8_0: "Q8_0",
14311518
}[file_type]
1432-
ret = model_paths[0].parent / f"ggml-model-{namestr}.gguf"
1519+
1520+
parameters = model_parameter_count_rounded_notation(model_params_count)
1521+
1522+
version = ""
1523+
if metadata is not None and metadata.version is not None:
1524+
version = f"-{metadata.version}"
1525+
1526+
name = "ggml-model"
1527+
if metadata is not None and metadata.name is not None:
1528+
name = metadata.name
1529+
elif params.path_model is not None:
1530+
name = params.path_model.name
1531+
1532+
ret = model_paths[0].parent / f"{name}{version}-{parameters}-{quantization}.gguf"
14331533
if ret in model_paths:
14341534
sys.stderr.write(
14351535
f"Error: Default output path ({ret}) would overwrite the input. "
@@ -1466,8 +1566,12 @@ def main(args_in: list[str] | None = None) -> None:
14661566
parser.add_argument("--big-endian", action="store_true", help="model is executed on big endian machine")
14671567
parser.add_argument("--pad-vocab", action="store_true", help="add pad tokens when model vocab expects more than tokenizer metadata provides")
14681568
parser.add_argument("--skip-unknown", action="store_true", help="skip unknown tensor names instead of failing")
1569+
parser.add_argument("--metadata", type=Path, help="Specify the path for a metadata file")
14691570

14701571
args = parser.parse_args(args_in)
1572+
1573+
metadata = Metadata.load(args.metadata)
1574+
14711575
if args.no_vocab and args.vocab_only:
14721576
raise ValueError("--vocab-only does not make sense with --no-vocab")
14731577

@@ -1481,6 +1585,9 @@ def main(args_in: list[str] | None = None) -> None:
14811585
else:
14821586
model_plus = ModelPlus(model = {}, paths = [args.model / 'dummy'], format = 'none', vocab = None)
14831587

1588+
model_params_count = model_parameter_count(model_plus.model)
1589+
print(f"model parameters count : {model_params_count} ({model_parameter_count_rounded_notation(model_params_count)})")
1590+
14841591
if args.dump:
14851592
do_dump_model(model_plus)
14861593
return
@@ -1520,27 +1627,30 @@ def main(args_in: list[str] | None = None) -> None:
15201627
raise ValueError("need --outfile if using --vocab-only")
15211628
outfile = args.outfile
15221629
OutputFile.write_vocab_only(outfile, params, vocab, special_vocab,
1523-
endianess=endianess, pad_vocab=args.pad_vocab)
1630+
endianess=endianess, pad_vocab=args.pad_vocab, metadata=metadata)
15241631
print(f"Wrote {outfile}")
15251632
return
15261633

15271634
if model_plus.vocab is not None and args.vocab_dir is None and not args.no_vocab:
15281635
vocab = model_plus.vocab
15291636

15301637
print(f"Vocab info: {vocab}")
1531-
print(f"Special vocab info: {special_vocab}")
1638+
special_vocab = gguf.SpecialVocab(model_plus.paths[0].parent,
1639+
load_merges = True,
1640+
n_vocab = vocab.vocab_size)
15321641

1642+
print(f"Special vocab info: {special_vocab}")
15331643
model = model_plus.model
15341644
model = convert_model_names(model, params, args.skip_unknown)
15351645
ftype = pick_output_type(model, args.outtype)
15361646
model = convert_to_output_type(model, ftype)
1537-
outfile = args.outfile or default_outfile(model_plus.paths, ftype)
1647+
outfile = args.outfile or default_outfile(model_plus.paths, ftype, params, model_params_count, metadata)
15381648

15391649
params.ftype = ftype
15401650
print(f"Writing {outfile}, format {ftype}")
15411651

15421652
OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab,
1543-
concurrency=args.concurrency, endianess=endianess, pad_vocab=args.pad_vocab)
1653+
concurrency=args.concurrency, endianess=endianess, pad_vocab=args.pad_vocab, metadata=metadata)
15441654
print(f"Wrote {outfile}")
15451655

15461656

gguf-py/gguf/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class General:
2424
ALIGNMENT = "general.alignment"
2525
NAME = "general.name"
2626
AUTHOR = "general.author"
27+
VERSION = "general.version"
2728
URL = "general.url"
2829
DESCRIPTION = "general.description"
2930
LICENSE = "general.license"

gguf-py/gguf/gguf_writer.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,9 @@ def add_architecture(self) -> None:
296296
def add_author(self, author: str) -> None:
297297
self.add_string(Keys.General.AUTHOR, author)
298298

299+
def add_version(self, version: str) -> None:
300+
self.add_string(Keys.General.VERSION, version)
301+
299302
def add_tensor_data_layout(self, layout: str) -> None:
300303
self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
301304

@@ -305,6 +308,9 @@ def add_url(self, url: str) -> None:
305308
def add_description(self, description: str) -> None:
306309
self.add_string(Keys.General.DESCRIPTION, description)
307310

311+
def add_licence(self, licence: str) -> None:
312+
self.add_string(Keys.General.LICENSE, licence)
313+
308314
def add_source_url(self, url: str) -> None:
309315
self.add_string(Keys.General.SOURCE_URL, url)
310316

llama.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,7 @@ enum llm_kv {
261261
LLM_KV_GENERAL_ALIGNMENT,
262262
LLM_KV_GENERAL_NAME,
263263
LLM_KV_GENERAL_AUTHOR,
264+
LLM_KV_GENERAL_VERSION,
264265
LLM_KV_GENERAL_URL,
265266
LLM_KV_GENERAL_DESCRIPTION,
266267
LLM_KV_GENERAL_LICENSE,
@@ -330,6 +331,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
330331
{ LLM_KV_GENERAL_ALIGNMENT, "general.alignment" },
331332
{ LLM_KV_GENERAL_NAME, "general.name" },
332333
{ LLM_KV_GENERAL_AUTHOR, "general.author" },
334+
{ LLM_KV_GENERAL_VERSION, "general.version" },
333335
{ LLM_KV_GENERAL_URL, "general.url" },
334336
{ LLM_KV_GENERAL_DESCRIPTION, "general.description" },
335337
{ LLM_KV_GENERAL_LICENSE, "general.license" },

0 commit comments

Comments
 (0)