Skip to content

Commit dae4a38

Browse files
authored
convert-hf : set the model name based on cli arg, if present
1 parent e141ce6 commit dae4a38

File tree

1 file changed

+14
-12
lines changed

1 file changed

+14
-12
lines changed

convert-hf-to-gguf.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ class Model:
5151
endianess: gguf.GGUFEndian
5252
use_temp_file: bool
5353
lazy: bool
54+
model_name: str | None
5455
part_names: list[str]
5556
is_safetensors: bool
5657
hparams: dict[str, Any]
@@ -63,7 +64,7 @@ class Model:
6364
# subclasses should define this!
6465
model_arch: gguf.MODEL_ARCH
6566

66-
def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool):
67+
def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool, model_name: str | None):
6768
if type(self) is Model:
6869
raise TypeError(f"{type(self).__name__!r} should not be directly instantiated")
6970
self.dir_model = dir_model
@@ -72,6 +73,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
7273
self.endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE
7374
self.use_temp_file = use_temp_file
7475
self.lazy = not eager
76+
self.model_name = model_name
7577
self.part_names = Model.get_model_part_names(self.dir_model, ".safetensors")
7678
self.is_safetensors = len(self.part_names) > 0
7779
if not self.is_safetensors:
@@ -181,7 +183,7 @@ def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", "
181183
return new_name
182184

183185
def set_gguf_parameters(self):
184-
self.gguf_writer.add_name(self.dir_model.name)
186+
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
185187
self.gguf_writer.add_block_count(self.block_count)
186188

187189
if (n_ctx := self.find_hparam(["max_position_embeddings", "n_ctx"], optional=True)) is not None:
@@ -661,7 +663,7 @@ class GPTNeoXModel(Model):
661663
def set_gguf_parameters(self):
662664
block_count = self.hparams["num_hidden_layers"]
663665

664-
self.gguf_writer.add_name(self.dir_model.name)
666+
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
665667
self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
666668
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
667669
self.gguf_writer.add_block_count(block_count)
@@ -794,7 +796,7 @@ def set_vocab(self):
794796

795797
def set_gguf_parameters(self):
796798
block_count = self.hparams["n_layers"]
797-
self.gguf_writer.add_name(self.dir_model.name)
799+
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
798800
self.gguf_writer.add_context_length(self.hparams["max_seq_len"])
799801
self.gguf_writer.add_embedding_length(self.hparams["d_model"])
800802
self.gguf_writer.add_block_count(block_count)
@@ -846,7 +848,7 @@ def set_gguf_parameters(self):
846848
raise ValueError("gguf: can not find ctx length parameter.")
847849

848850
self.gguf_writer.add_file_type(self.ftype)
849-
self.gguf_writer.add_name(self.dir_model.name)
851+
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
850852
self.gguf_writer.add_source_hf_repo(hf_repo)
851853
self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
852854
self.gguf_writer.add_context_length(ctx_length)
@@ -883,7 +885,7 @@ def set_gguf_parameters(self):
883885
else:
884886
raise ValueError("gguf: can not find ctx length parameter.")
885887

886-
self.gguf_writer.add_name(self.dir_model.name)
888+
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
887889
self.gguf_writer.add_source_hf_repo(hf_repo)
888890
self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
889891
self.gguf_writer.add_context_length(ctx_length)
@@ -1006,7 +1008,7 @@ def set_gguf_parameters(self):
10061008
else:
10071009
raise ValueError("gguf: can not find ctx length parameter.")
10081010

1009-
self.gguf_writer.add_name(self.dir_model.name)
1011+
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
10101012
self.gguf_writer.add_source_hf_repo(hf_repo)
10111013
self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
10121014
self.gguf_writer.add_context_length(ctx_length)
@@ -1202,7 +1204,7 @@ def set_gguf_parameters(self):
12021204
hparams = self.hparams
12031205
block_count = hparams["num_hidden_layers"]
12041206

1205-
self.gguf_writer.add_name(self.dir_model.name)
1207+
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
12061208
self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
12071209
self.gguf_writer.add_embedding_length(hparams["hidden_size"])
12081210
self.gguf_writer.add_block_count(block_count)
@@ -1677,7 +1679,7 @@ class GPT2Model(Model):
16771679
model_arch = gguf.MODEL_ARCH.GPT2
16781680

16791681
def set_gguf_parameters(self):
1680-
self.gguf_writer.add_name(self.dir_model.name)
1682+
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
16811683
self.gguf_writer.add_block_count(self.hparams["n_layer"])
16821684
self.gguf_writer.add_context_length(self.hparams["n_ctx"])
16831685
self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
@@ -2244,7 +2246,7 @@ def set_gguf_parameters(self):
22442246
hparams = self.hparams
22452247
block_count = hparams["num_hidden_layers"]
22462248

2247-
self.gguf_writer.add_name(self.dir_model.name)
2249+
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
22482250
self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
22492251
self.gguf_writer.add_embedding_length(hparams["hidden_size"])
22502252
self.gguf_writer.add_block_count(block_count)
@@ -2344,7 +2346,7 @@ def set_gguf_parameters(self):
23442346
# Fail early for models which don't have a block expansion factor of 2
23452347
assert d_inner == 2 * d_model
23462348

2347-
self.gguf_writer.add_name(self.dir_model.name)
2349+
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
23482350
self.gguf_writer.add_context_length(2**20) # arbitrary value; for those who use the default
23492351
self.gguf_writer.add_embedding_length(d_model)
23502352
self.gguf_writer.add_feed_forward_length(0) # unused, but seemingly required when loading
@@ -2846,7 +2848,7 @@ def main() -> None:
28462848
logger.error(f"Model {hparams['architectures'][0]} is not supported")
28472849
sys.exit(1)
28482850

2849-
model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file, args.no_lazy)
2851+
model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file, args.no_lazy, args.model_name)
28502852

28512853
logger.info("Set model parameters")
28522854
model_instance.set_gguf_parameters()

0 commit comments

Comments
 (0)