Skip to content

Commit 38341e2

Browse files
committed
Allow overriding vocab and hyperparams from original model metadata
1 parent 666b5a6 commit 38341e2

File tree

1 file changed

+79
-14
lines changed

1 file changed

+79
-14
lines changed

convert-llama-ggmlv3-to-gguf.py

Lines changed: 79 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -125,25 +125,30 @@ def load(self, data, offset):
125125
return offset
126126

127127
class GGMLToGGUF:
128-
def __init__(self, ggml_model, data, cfg):
128+
def __init__(self, ggml_model, data, cfg, params_override = None, vocab_override = None):
129129
hp = ggml_model.hyperparameters
130130
self.model = ggml_model
131131
self.data = data
132132
self.cfg = cfg
133+
self.params_override = params_override
134+
self.vocab_override = vocab_override
133135
ff_tensor_idx = ggml_model.tensor_map.get(b'layers.0.feed_forward.w1.weight')
134136
assert ff_tensor_idx is not None, 'Missing layer 0 FF tensor'
135137
ff_tensor = ggml_model.tensors[ff_tensor_idx]
136138
self.ff_length = ff_tensor.dims[1]
137-
if cfg.gqa == 1:
138-
n_kv_head = hp.n_head
139+
if params_override is not None:
140+
n_kv_head = params_override.n_head_kv
139141
else:
140-
gqa = float(cfg.gqa)
141-
n_kv_head = None
142-
for x in range(1, 256):
143-
if float(hp.n_head) / float(x) == gqa:
144-
n_kv_head = x
145-
assert n_kv_head is not None, "Couldn't determine n_kv_head from GQA param"
146-
print(f'- Guessed n_kv_head = {n_kv_head} based on GQA {cfg.gqa}')
142+
if cfg.gqa == 1:
143+
n_kv_head = hp.n_head
144+
else:
145+
gqa = float(cfg.gqa)
146+
n_kv_head = None
147+
for x in range(1, 256):
148+
if float(hp.n_head) / float(x) == gqa:
149+
n_kv_head = x
150+
assert n_kv_head is not None, "Couldn't determine n_kv_head from GQA param"
151+
print(f'- Guessed n_kv_head = {n_kv_head} based on GQA {cfg.gqa}')
147152
self.n_kv_head = n_kv_head
148153
self.name_map = gguf.get_tensor_name_map(gguf.MODEL_ARCH.LLAMA, ggml_model.hyperparameters.n_layer)
149154

@@ -174,6 +179,20 @@ def add_params(self, gguf_writer):
174179
if name is not None:
175180
gguf_writer.add_name(name)
176181
gguf_writer.add_description(desc)
182+
if self.params_override is not None:
183+
po = self.params_override
184+
assert po.n_embd == hp.n_embd, 'Model hyperparams mismatch'
185+
assert po.n_layer == hp.n_layer, 'Model hyperparams mismatch'
186+
assert po.n_head == hp.n_head, 'Model hyperparams mismatch'
187+
gguf_writer.add_context_length (po.n_ctx)
188+
gguf_writer.add_embedding_length (po.n_embd)
189+
gguf_writer.add_block_count (po.n_layer)
190+
gguf_writer.add_feed_forward_length (po.n_ff)
191+
gguf_writer.add_rope_dimension_count(po.n_embd // po.n_head)
192+
gguf_writer.add_head_count (po.n_head)
193+
gguf_writer.add_head_count_kv (po.n_head_kv)
194+
gguf_writer.add_layer_norm_rms_eps (po.f_norm_eps)
195+
return
177196
gguf_writer.add_context_length(cfg.context_length)
178197
gguf_writer.add_embedding_length(hp.n_embd)
179198
gguf_writer.add_block_count(hp.n_layer)
@@ -182,14 +201,32 @@ def add_params(self, gguf_writer):
182201
gguf_writer.add_head_count(hp.n_head)
183202
gguf_writer.add_head_count_kv(self.n_kv_head)
184203
gguf_writer.add_layer_norm_rms_eps(float(cfg.eps))
185-
gguf_writer.add_tokenizer_model('llama')
186204

187205
def add_vocab(self, gguf_writer):
188206
hp = self.model.hyperparameters
207+
gguf_writer.add_tokenizer_model('llama')
189208
tokens = []
190209
scores = []
191-
print(f'* Adding {hp.n_vocab} vocab item(s)')
192210
toktypes = []
211+
if self.vocab_override is not None:
212+
vo = self.vocab_override
213+
print('* Adding vocab item(s)')
214+
for (idx, vitem) in enumerate(vo.all_tokens()):
215+
if len(vitem) == 3:
216+
tokens.append(vitem[0])
217+
scores.append(vitem[1])
218+
toktypes.append(vitem[2])
219+
else:
220+
# Maybe try to guess the token type here?
221+
tokens.append(vitem[0])
222+
scores.append(vitem[1])
223+
assert len(tokens) == hp.n_vocab, f'Override vocab has a different number of items than hyperparameters - override = {len(tokens)} but n_vocab={hp.n_vocab}'
224+
gguf_writer.add_token_list(tokens)
225+
gguf_writer.add_token_scores(scores)
226+
if len(toktypes) > 0:
227+
gguf_writer.add_token_types(toktypes)
228+
return
229+
print(f'* Adding {hp.n_vocab} vocab item(s)')
193230
for (tokid, (vbytes, vscore)) in enumerate(self.model.vocab.items):
194231
tt = 1 # Normal
195232
if len(vbytes) == 0:
@@ -230,6 +267,23 @@ def add_tensors(self, gguf_writer):
230267
# print(f'+ {tensor.name} | {mapped_name} {tensor.dims} :: {tempdims}')
231268
gguf_writer.add_tensor(mapped_name, data[tensor.start_offset:tensor.start_offset + tensor.len_bytes], raw_shape = tempdims, raw_dtype = tensor.dtype)
232269

270+
def handle_metadata(cfg):
271+
import convert
272+
assert cfg.model_metadata_dir.is_dir(), 'Metadata dir is not a directory'
273+
hf_config_path = cfg.model_metadata_dir / "config.json"
274+
orig_config_path = cfg.model_metadata_dir / "params.json"
275+
# Passing None to these load functions is not kosher but it should
276+
# currently work for HF and only fail for original mode if
277+
# n_vocab or n_ff is missing in params.json
278+
if hf_config_path.exists():
279+
params = convert.Params.loadHFTransformerJson(None, hf_config_path)
280+
elif orig_config_path.exists():
281+
params = convert.Params.loadOriginalParamsJson(None, orig_config_path)
282+
else:
283+
raise ValueError('Unable to load metadata')
284+
vocab = convert.load_vocab(cfg.vocab_dir if cfg.vocab_dir is not None else cfg.model_metadata_dir, cfg.vocabtype)
285+
convert.check_vocab_size(params, vocab)
286+
return (params, vocab)
233287

234288
def handle_args():
235289
parser = argparse.ArgumentParser(description = 'Convert GGMLv3 models to GGUF')
@@ -240,18 +294,29 @@ def handle_args():
240294
parser.add_argument('--gqa', type = int, default = 1, help = 'grouped-query attention factor (use 8 for LLaMA2 70B)')
241295
parser.add_argument('--eps', default = '5.0e-06', help = 'RMS norm eps: Use 1e-6 for LLaMA1 and OpenLLaMA, use 1e-5 for LLaMA2')
242296
parser.add_argument('--context-length', '-c', type=int, default = 2048, help = 'Default max context length: LLaMA1 is typically 2048, LLaMA2 is typically 4096')
297+
parser.add_argument('--model-metadata-dir', '-m', type = Path, help ='Load HuggingFace/.pth vocab and metadata from the specified directory')
298+
parser.add_argument("--vocab-dir", type=Path, help="directory containing tokenizer.model, if separate from model file - only meaningful with --model-metadata-dir")
299+
parser.add_argument("--vocabtype", choices=["spm", "bpe"], help="vocab format - only meaningful with --model-metadata-dir and/or --vocab-dir (default: spm)", default="spm")
243300
return parser.parse_args()
244301

245302
def main():
246303
cfg = handle_args()
247304
print(f'* Using config: {cfg}')
248-
print('\n=== WARNING === Be aware that this conversion script is best-effort. Special tokens may not be converted correctly. Use a native GGUF model if possible. === WARNING ===\n')
305+
print('\n=== WARNING === Be aware that this conversion script is best-effort. Use a native GGUF model if possible. === WARNING ===\n')
306+
vocab_override = None
307+
metadata_override = None
308+
if cfg.model_metadata_dir is not None:
309+
(params_override, vocab_override) = handle_metadata(cfg)
310+
print(f'* Overriding params: {params_override}')
311+
print(f'* Overriding vocab: {vocab_override}')
312+
else:
313+
print('\n=== WARNING === Special tokens may not be converted correctly. Use --model-metadata-dir if possible === WARNING ===\n')
249314
data = np.memmap(cfg.input, mode = 'r')
250315
model = GGMLV3Model()
251316
print('* Scanning GGML input file')
252317
offset = model.load(data, 0)
253318
print(f'* GGML model hyperparameters: {model.hyperparameters}')
254-
converter = GGMLToGGUF(model, data, cfg)
319+
converter = GGMLToGGUF(model, data, cfg, params_override = params_override, vocab_override = vocab_override)
255320
converter.save()
256321
print(f'* Successful completion. Output saved to: {cfg.output}')
257322

0 commit comments

Comments
 (0)