convert-new.py : output gguf (WIP)

ggerganov · ggerganov · commit 250cf83847f2 · 2023-08-16T20:44:51.000+03:00
diff --git a/convert-new.py b/convert-new.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python
 
+import gguf
 import argparse
 import concurrent.futures
 import copy
@@ -118,6 +119,7 @@ class Params:
     n_mult:    int
     n_head:    int
     n_layer:   int
+    n_ctx:     int
     n_kv_head: Optional[int]  # This parameter is only used for Llama 2
 
     @staticmethod
@@ -145,6 +147,7 @@ def guessed(model: 'LazyModel') -> 'Params':
             n_mult    = 256,
             n_head    = n_head,
             n_layer   = n_layer,
+            n_ctx     = -1,
             n_kv_head = None,
         )
 
@@ -161,12 +164,21 @@ def loadHFTransformerJson(model: 'LazyModel', config_path: 'Path') -> 'Params':
 
         n_mult = find_n_mult(n_ff, n_embd);
 
+        if "max_sequence_length" in hparams:
+            n_ctx = hparams["max_sequence_length"]
+        elif "max_position_embeddings" in hparams:
+            n_ctx = hparams["max_position_embeddings"]
+        else:
+            raise Exception("failed to guess 'n_ctx'. This model is unknown or unsupported.\n"
+                            "Suggestion: provide 'config.json' of the model in the same directory containing model files.")
+
         return Params(
             n_vocab   = n_vocab,
             n_embd    = n_embd,
             n_mult    = n_mult,
             n_head    = n_head,
             n_layer   = n_layer,
+            n_ctx     = n_ctx,
             n_kv_head = n_kv_head,
         )
 
@@ -191,6 +203,7 @@ def loadOriginalParamsJson(model: 'LazyModel', config_path: 'Path') -> 'Params':
             n_mult    = n_mult,
             n_head    = n_head,
             n_layer   = n_layer,
+            n_ctx     = -1,
             n_kv_head = None,
         )
 
@@ -206,7 +219,6 @@ def load(model_plus: 'ModelPlus') -> 'Params':
         else:
             params = Params.guessed(model_plus.model)
 
-        print(f'params: n_vocab:{params.n_vocab} n_embd:{params.n_embd} n_mult:{params.n_mult} n_head:{params.n_head} n_layer:{params.n_layer}')
         return params
 
 
@@ -715,21 +727,14 @@ def check_vocab_size(params: Params, vocab: Vocab) -> None:
 
 class OutputFile:
     def __init__(self, fname_out: Path) -> None:
-        self.fout = open(fname_out, "wb")
+        self.gguf = gguf.GGUFWriter.open(fname_out)
 
     def write_file_header(self, params: Params, file_type: GGMLFileType) -> None:
-        self.fout.write(b"ggjt"[::-1])  # magic
-        values = [
-            1,  # file version
-            params.n_vocab,
-            params.n_embd,
-            params.n_mult,
-            params.n_head,
-            params.n_layer,
-            params.n_embd // params.n_head,  # rot (obsolete)
-            file_type.value,
-        ]
-        self.fout.write(struct.pack("i" * len(values), *values))
+        llm_arch = "llama"
+
+        self.gguf.add_architecture(llm_arch)
+        self.gguf.add_context_length(llm_arch, params.n_ctx)
+        self.gguf.add_embedding_length(llm_arch, params.n_embd)
 
     def write_tensor_header(self, name: str, shape: Sequence[int], data_type: DataType) -> None:
         sname = name.encode('utf-8')
@@ -873,7 +878,6 @@ def filter_and_sort_tensors(model: LazyModel) -> LazyModel:
 
 
 def load_vocab(path: Path, vocabtype: Optional[str]) -> Union[BpeVocab, SentencePieceVocab]:
-    print(f"vocabtype: {vocabtype}")
     # Be extra-friendly and accept either a file or a directory.  Also, if it's
     # a directory, it might be the model directory, and tokenizer.model might
     # be in the parent of that.
@@ -893,7 +897,7 @@ def load_vocab(path: Path, vocabtype: Optional[str]) -> Union[BpeVocab, Sentence
                 f"Could not find tokenizer.model in {path} or its parent; "
                 "if it's in another directory, pass the directory as --vocab-dir")
     added_tokens_path = path.parent / "added_tokens.json"
-    print(f"Loading vocab file {path}")
+    print(f"Loading vocab file '{path}', type '{vocabtype}'")
     if vocabtype == "bpe":
         return BpeVocab(path, added_tokens_path if added_tokens_path.exists() else None)
     elif vocabtype == "spm":
@@ -933,21 +937,34 @@ def main(args_in: Optional[List[str]] = None) -> None:
     parser.add_argument("--vocab-dir",   type=Path,              help="directory containing tokenizer.model, if separate from model file")
     parser.add_argument("--outfile",     type=Path,              help="path to write to; default: based on input")
     parser.add_argument("model",         type=Path,              help="directory containing model file, or model file itself (*.pth, *.pt, *.bin)")
-    parser.add_argument("--vocabtype",   choices=["spm", "bpe"], help="vocab format (default: spm)")
+    parser.add_argument("--vocabtype",   choices=["spm", "bpe"], help="vocab format (default: spm)", default="spm")
+    parser.add_argument("--ctx",         type=int,               help="model training context (default: based on input)")
     args = parser.parse_args(args_in)
 
     vocab: Vocab
     if args.dump_single:
         model_plus = lazy_load_file(args.model)
         do_dump_model(model_plus)
-    elif args.vocab_only:
+
+    model_plus = load_some_model(args.model)
+    params = Params.load(model_plus)
+    if params.n_ctx == -1:
+        if args.ctx is None:
+            raise Exception("The model doesn't have a context size, and you didn't specify one with --ctx\n"
+                            "Please specify one with --ctx:\n"
+                            " - LLaMA v1: --ctx 2048\n"
+                            " - LLaMA v2: --ctx 4096\n")
+        params.n_ctx = args.ctx
+
+    print(f"params = {params}")
+
+    if args.vocab_only:
         vocab = load_vocab(args.vocab_dir or args.model, args.vocabtype)
         assert args.outfile, "need --outfile if using --vocab-only"
         outfile = args.outfile
         OutputFile.write_vocab_only(outfile, vocab)
         print(f"Wrote {outfile}")
     else:
-        model_plus = load_some_model(args.model)
         if args.dump:
             do_dump_model(model_plus)
             return
@@ -957,7 +974,6 @@ def main(args_in: Optional[List[str]] = None) -> None:
             vocab_dir = args.vocab_dir if args.vocab_dir else model_plus.paths[0].parent
             vocab = load_vocab(vocab_dir, args.vocabtype)
 
-        params      = Params.load(model_plus)
         model       = model_plus.model
         model       = do_necessary_conversions(model, params)
         output_type = pick_output_type(model, args.outtype)