Skip to content

Commit 5fa1a08

Browse files
committed
refactor: Update OutputFile class for enhanced model vocabulary management
- Restructured the constructor for improved readability. - Updated `add_meta_arch` method for flexible model name determination. - Introduced `handle_tokenizer_model` for mapping vocab types to supported tokenizer models. - Streamlined vocabulary extraction with `extract_vocabulary_from_model`. - Simplified vocabulary metadata addition using `add_meta_vocab`. - Refactored `add_tensor_info` for clarity and consistency. - Improved error handling for better user feedback. These changes signify the development of a versatile and comprehensive `OutputFile` class, enabling efficient management of model conversion output, metadata, vocabulary, and tensor information.
1 parent 7e4a4eb commit 5fa1a08

File tree

1 file changed

+84
-30
lines changed

1 file changed

+84
-30
lines changed

convert.py

Lines changed: 84 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1019,8 +1019,12 @@ def check_vocab_size(params: Params, vocab: Vocab, pad_vocab: bool = False) -> N
10191019

10201020

10211021
class OutputFile:
1022-
def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
1023-
self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess)
1022+
def __init__(
1023+
self, fname_out: Path, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE
1024+
) -> None:
1025+
self.gguf = gguf.GGUFWriter(
1026+
fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess
1027+
)
10241028

10251029
def add_meta_arch(self, params: Params) -> None:
10261030
name = "LLaMA"
@@ -1029,28 +1033,28 @@ def add_meta_arch(self, params: Params) -> None:
10291033
if params.n_ctx == 4096:
10301034
name = "LLaMA v2"
10311035
elif params.path_model is not None:
1032-
name = str(params.path_model.parent).split('/')[-1]
1036+
name = str(params.path_model.parent).split("/")[-1]
10331037

1034-
self.gguf.add_name (name)
1035-
self.gguf.add_context_length (params.n_ctx)
1036-
self.gguf.add_embedding_length (params.n_embd)
1037-
self.gguf.add_block_count (params.n_layer)
1038-
self.gguf.add_feed_forward_length (params.n_ff)
1038+
self.gguf.add_name(name)
1039+
self.gguf.add_context_length(params.n_ctx)
1040+
self.gguf.add_embedding_length(params.n_embd)
1041+
self.gguf.add_block_count(params.n_layer)
1042+
self.gguf.add_feed_forward_length(params.n_ff)
10391043
self.gguf.add_rope_dimension_count(params.n_embd // params.n_head)
1040-
self.gguf.add_head_count (params.n_head)
1041-
self.gguf.add_head_count_kv (params.n_head_kv)
1044+
self.gguf.add_head_count(params.n_head)
1045+
self.gguf.add_head_count_kv(params.n_head_kv)
1046+
1047+
if params.f_norm_eps is None:
1048+
raise ValueError("f_norm_eps is None")
1049+
1050+
self.gguf.add_layer_norm_rms_eps(params.f_norm_eps)
10421051

10431052
if params.n_experts:
10441053
self.gguf.add_expert_count(params.n_experts)
10451054

10461055
if params.n_experts_used:
10471056
self.gguf.add_expert_used_count(params.n_experts_used)
10481057

1049-
if params.f_norm_eps:
1050-
self.gguf.add_layer_norm_rms_eps(params.f_norm_eps)
1051-
else:
1052-
raise ValueError('f_norm_eps is None')
1053-
10541058
if params.f_rope_freq_base is not None:
10551059
self.gguf.add_rope_freq_base(params.f_rope_freq_base)
10561060

@@ -1068,18 +1072,44 @@ def add_meta_arch(self, params: Params) -> None:
10681072
if params.ftype is not None:
10691073
self.gguf.add_file_type(params.ftype)
10701074

1071-
def add_meta_vocab(self, vocab: Vocab) -> None:
1075+
def handle_tokenizer_model(self, vocab: Vocab) -> str:
1076+
# Map the vocab types to the supported tokenizer models
1077+
tokenizer_model = {
1078+
SentencePieceVocab: "llama",
1079+
HfVocab: "llama",
1080+
BpeVocab: "gpt2",
1081+
}.get(type(vocab))
1082+
1083+
# Block if vocab type is not predefined
1084+
if tokenizer_model is None:
1085+
raise ValueError("Unknown vocab type: Not supported")
1086+
1087+
return tokenizer_model
1088+
1089+
def extract_vocabulary_from_model(self, vocab: Vocab) -> Tuple[list, list, list]:
10721090
tokens = []
10731091
scores = []
10741092
toktypes = []
1093+
10751094
# NOTE: `all_tokens` returns the base vocabulary and added tokens
10761095
for text, score, toktype in vocab.all_tokens():
10771096
tokens.append(text)
10781097
scores.append(score)
10791098
toktypes.append(toktype)
10801099

1081-
vocab_type = vocab.get_vocab_type()
1082-
self.gguf.add_tokenizer_model(vocab_type)
1100+
return tokens, scores, toktypes
1101+
1102+
def add_meta_vocab(self, vocab: Vocab) -> None:
1103+
# Handle the tokenizer model
1104+
tokenizer_model = self.handle_tokenizer_model(vocab)
1105+
1106+
# Ensure that tokenizer_model is added to the GGUF model
1107+
self.gguf.add_tokenizer_model(tokenizer_model)
1108+
1109+
# Extract model vocabulary for model conversion
1110+
tokens, scores, toktypes = self.extract_vocabulary_from_model(vocab)
1111+
1112+
# Add extracted token information for model conversion
10831113
self.gguf.add_token_list(tokens)
10841114
self.gguf.add_token_scores(scores)
10851115
self.gguf.add_token_types(toktypes)
@@ -1089,10 +1119,14 @@ def add_meta_special_vocab(self, svocab: gguf.SpecialVocab) -> None:
10891119

10901120
def add_tensor_info(self, name: str, tensor: LazyTensor) -> None:
10911121
n_elements = int(np.prod(tensor.shape))
1092-
raw_dtype = getattr(tensor.data_type, 'ggml_type', None)
1093-
data_type = getattr(tensor.data_type, 'quantized_type', None) or tensor.data_type.dtype
1122+
raw_dtype = getattr(tensor.data_type, "ggml_type", None)
1123+
data_type = (
1124+
getattr(tensor.data_type, "quantized_type", None) or tensor.data_type.dtype
1125+
)
10941126
data_nbytes = tensor.data_type.elements_to_bytes(n_elements)
1095-
self.gguf.add_tensor_info(name, tensor.shape, data_type, data_nbytes, raw_dtype = raw_dtype)
1127+
self.gguf.add_tensor_info(
1128+
name, tensor.shape, data_type, data_nbytes, raw_dtype=raw_dtype
1129+
)
10961130

10971131
def write_meta(self) -> None:
10981132
self.gguf.write_header_to_file()
@@ -1106,11 +1140,14 @@ def close(self) -> None:
11061140

11071141
@staticmethod
11081142
def write_vocab_only(
1109-
fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab,
1143+
fname_out: Path,
1144+
params: Params,
1145+
vocab: Vocab,
1146+
svocab: gguf.SpecialVocab,
11101147
endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE,
11111148
pad_vocab: bool = False,
11121149
) -> None:
1113-
check_vocab_size(params, vocab, pad_vocab = pad_vocab)
1150+
check_vocab_size(params, vocab, pad_vocab=pad_vocab)
11141151

11151152
of = OutputFile(fname_out, endianess=endianess)
11161153

@@ -1138,12 +1175,17 @@ def maybe_do_quantize(item: tuple[DataType, NDArray]) -> NDArray:
11381175

11391176
@staticmethod
11401177
def write_all(
1141-
fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab,
1178+
fname_out: Path,
1179+
ftype: GGMLFileType,
1180+
params: Params,
1181+
model: LazyModel,
1182+
vocab: Vocab,
1183+
svocab: gguf.SpecialVocab,
11421184
concurrency: int = DEFAULT_CONCURRENCY,
11431185
endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE,
11441186
pad_vocab: bool = False,
11451187
) -> None:
1146-
check_vocab_size(params, vocab, pad_vocab = pad_vocab)
1188+
check_vocab_size(params, vocab, pad_vocab=pad_vocab)
11471189

11481190
of = OutputFile(fname_out, endianess=endianess)
11491191

@@ -1160,18 +1202,30 @@ def write_all(
11601202
of.write_tensor_info()
11611203

11621204
# tensor data
1163-
ndarrays_inner = bounded_parallel_map(OutputFile.do_item, model.items(), concurrency = concurrency)
1205+
ndarrays_inner = bounded_parallel_map(
1206+
OutputFile.do_item, model.items(), concurrency=concurrency
1207+
)
11641208
if ftype == GGMLFileType.MostlyQ8_0:
1165-
ndarrays = bounded_parallel_map(OutputFile.maybe_do_quantize, ndarrays_inner, concurrency = concurrency, max_workers = concurrency, use_processpool_executor = True)
1209+
ndarrays = bounded_parallel_map(
1210+
OutputFile.maybe_do_quantize,
1211+
ndarrays_inner,
1212+
concurrency=concurrency,
1213+
max_workers=concurrency,
1214+
use_processpool_executor=True,
1215+
)
11661216
else:
11671217
ndarrays = map(OutputFile.maybe_do_quantize, ndarrays_inner)
11681218

11691219
start = time.time()
1170-
for i, ((name, lazy_tensor), ndarray) in enumerate(zip(model.items(), ndarrays)):
1220+
for i, ((name, lazy_tensor), ndarray) in enumerate(
1221+
zip(model.items(), ndarrays)
1222+
):
11711223
elapsed = time.time() - start
1172-
size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
1224+
size = " x ".join(f"{dim:6d}" for dim in lazy_tensor.shape)
11731225
padi = len(str(len(model)))
1174-
print(f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}")
1226+
print(
1227+
f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}"
1228+
)
11751229
of.gguf.write_tensor_data(ndarray)
11761230

11771231
of.close()

0 commit comments

Comments
 (0)