You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
assertlen(tokens) ==hp.n_vocab, f'Override vocab has a different number of items than hyperparameters - override = {len(tokens)} but n_vocab={hp.n_vocab}'
224
+
gguf_writer.add_token_list(tokens)
225
+
gguf_writer.add_token_scores(scores)
226
+
iflen(toktypes) >0:
227
+
gguf_writer.add_token_types(toktypes)
228
+
return
229
+
print(f'* Adding {hp.n_vocab} vocab item(s)')
193
230
for (tokid, (vbytes, vscore)) inenumerate(self.model.vocab.items):
parser.add_argument('--eps', default='5.0e-06', help='RMS norm eps: Use 1e-6 for LLaMA1 and OpenLLaMA, use 1e-5 for LLaMA2')
242
296
parser.add_argument('--context-length', '-c', type=int, default=2048, help='Default max context length: LLaMA1 is typically 2048, LLaMA2 is typically 4096')
297
+
parser.add_argument('--model-metadata-dir', '-m', type=Path, help='Load HuggingFace/.pth vocab and metadata from the specified directory')
298
+
parser.add_argument("--vocab-dir", type=Path, help="directory containing tokenizer.model, if separate from model file - only meaningful with --model-metadata-dir")
299
+
parser.add_argument("--vocabtype", choices=["spm", "bpe"], help="vocab format - only meaningful with --model-metadata-dir and/or --vocab-dir (default: spm)", default="spm")
243
300
returnparser.parse_args()
244
301
245
302
defmain():
246
303
cfg=handle_args()
247
304
print(f'* Using config: {cfg}')
248
-
print('\n=== WARNING === Be aware that this conversion script is best-effort. Special tokens may not be converted correctly. Use a native GGUF model if possible. === WARNING ===\n')
305
+
print('\n=== WARNING === Be aware that this conversion script is best-effort. Use a native GGUF model if possible. === WARNING ===\n')
0 commit comments