1
1
#!/usr/bin/env python
2
2
3
+ import gguf
3
4
import argparse
4
5
import concurrent .futures
5
6
import copy
@@ -118,6 +119,7 @@ class Params:
118
119
n_mult : int
119
120
n_head : int
120
121
n_layer : int
122
+ n_ctx : int
121
123
n_kv_head : Optional [int ] # This parameter is only used for Llama 2
122
124
123
125
@staticmethod
@@ -145,6 +147,7 @@ def guessed(model: 'LazyModel') -> 'Params':
145
147
n_mult = 256 ,
146
148
n_head = n_head ,
147
149
n_layer = n_layer ,
150
+ n_ctx = - 1 ,
148
151
n_kv_head = None ,
149
152
)
150
153
@@ -161,12 +164,21 @@ def loadHFTransformerJson(model: 'LazyModel', config_path: 'Path') -> 'Params':
161
164
162
165
n_mult = find_n_mult (n_ff , n_embd );
163
166
167
+ if "max_sequence_length" in hparams :
168
+ n_ctx = hparams ["max_sequence_length" ]
169
+ elif "max_position_embeddings" in hparams :
170
+ n_ctx = hparams ["max_position_embeddings" ]
171
+ else :
172
+ raise Exception ("failed to guess 'n_ctx'. This model is unknown or unsupported.\n "
173
+ "Suggestion: provide 'config.json' of the model in the same directory containing model files." )
174
+
164
175
return Params (
165
176
n_vocab = n_vocab ,
166
177
n_embd = n_embd ,
167
178
n_mult = n_mult ,
168
179
n_head = n_head ,
169
180
n_layer = n_layer ,
181
+ n_ctx = n_ctx ,
170
182
n_kv_head = n_kv_head ,
171
183
)
172
184
@@ -191,6 +203,7 @@ def loadOriginalParamsJson(model: 'LazyModel', config_path: 'Path') -> 'Params':
191
203
n_mult = n_mult ,
192
204
n_head = n_head ,
193
205
n_layer = n_layer ,
206
+ n_ctx = - 1 ,
194
207
n_kv_head = None ,
195
208
)
196
209
@@ -206,7 +219,6 @@ def load(model_plus: 'ModelPlus') -> 'Params':
206
219
else :
207
220
params = Params .guessed (model_plus .model )
208
221
209
- print (f'params: n_vocab:{ params .n_vocab } n_embd:{ params .n_embd } n_mult:{ params .n_mult } n_head:{ params .n_head } n_layer:{ params .n_layer } ' )
210
222
return params
211
223
212
224
@@ -715,21 +727,14 @@ def check_vocab_size(params: Params, vocab: Vocab) -> None:
715
727
716
728
class OutputFile :
717
729
def __init__ (self , fname_out : Path ) -> None :
718
- self .fout = open (fname_out , "wb" )
730
+ self .gguf = gguf . GGUFWriter . open (fname_out )
719
731
720
732
def write_file_header (self , params : Params , file_type : GGMLFileType ) -> None :
721
- self .fout .write (b"ggjt" [::- 1 ]) # magic
722
- values = [
723
- 1 , # file version
724
- params .n_vocab ,
725
- params .n_embd ,
726
- params .n_mult ,
727
- params .n_head ,
728
- params .n_layer ,
729
- params .n_embd // params .n_head , # rot (obsolete)
730
- file_type .value ,
731
- ]
732
- self .fout .write (struct .pack ("i" * len (values ), * values ))
733
+ llm_arch = "llama"
734
+
735
+ self .gguf .add_architecture (llm_arch )
736
+ self .gguf .add_context_length (llm_arch , params .n_ctx )
737
+ self .gguf .add_embedding_length (llm_arch , params .n_embd )
733
738
734
739
def write_tensor_header (self , name : str , shape : Sequence [int ], data_type : DataType ) -> None :
735
740
sname = name .encode ('utf-8' )
@@ -873,7 +878,6 @@ def filter_and_sort_tensors(model: LazyModel) -> LazyModel:
873
878
874
879
875
880
def load_vocab (path : Path , vocabtype : Optional [str ]) -> Union [BpeVocab , SentencePieceVocab ]:
876
- print (f"vocabtype: { vocabtype } " )
877
881
# Be extra-friendly and accept either a file or a directory. Also, if it's
878
882
# a directory, it might be the model directory, and tokenizer.model might
879
883
# be in the parent of that.
@@ -893,7 +897,7 @@ def load_vocab(path: Path, vocabtype: Optional[str]) -> Union[BpeVocab, Sentence
893
897
f"Could not find tokenizer.model in { path } or its parent; "
894
898
"if it's in another directory, pass the directory as --vocab-dir" )
895
899
added_tokens_path = path .parent / "added_tokens.json"
896
- print (f"Loading vocab file { path } " )
900
+ print (f"Loading vocab file ' { path } ', type ' { vocabtype } ' " )
897
901
if vocabtype == "bpe" :
898
902
return BpeVocab (path , added_tokens_path if added_tokens_path .exists () else None )
899
903
elif vocabtype == "spm" :
@@ -933,21 +937,34 @@ def main(args_in: Optional[List[str]] = None) -> None:
933
937
parser .add_argument ("--vocab-dir" , type = Path , help = "directory containing tokenizer.model, if separate from model file" )
934
938
parser .add_argument ("--outfile" , type = Path , help = "path to write to; default: based on input" )
935
939
parser .add_argument ("model" , type = Path , help = "directory containing model file, or model file itself (*.pth, *.pt, *.bin)" )
936
- parser .add_argument ("--vocabtype" , choices = ["spm" , "bpe" ], help = "vocab format (default: spm)" )
940
+ parser .add_argument ("--vocabtype" , choices = ["spm" , "bpe" ], help = "vocab format (default: spm)" , default = "spm" )
941
+ parser .add_argument ("--ctx" , type = int , help = "model training context (default: based on input)" )
937
942
args = parser .parse_args (args_in )
938
943
939
944
vocab : Vocab
940
945
if args .dump_single :
941
946
model_plus = lazy_load_file (args .model )
942
947
do_dump_model (model_plus )
943
- elif args .vocab_only :
948
+
949
+ model_plus = load_some_model (args .model )
950
+ params = Params .load (model_plus )
951
+ if params .n_ctx == - 1 :
952
+ if args .ctx is None :
953
+ raise Exception ("The model doesn't have a context size, and you didn't specify one with --ctx\n "
954
+ "Please specify one with --ctx:\n "
955
+ " - LLaMA v1: --ctx 2048\n "
956
+ " - LLaMA v2: --ctx 4096\n " )
957
+ params .n_ctx = args .ctx
958
+
959
+ print (f"params = { params } " )
960
+
961
+ if args .vocab_only :
944
962
vocab = load_vocab (args .vocab_dir or args .model , args .vocabtype )
945
963
assert args .outfile , "need --outfile if using --vocab-only"
946
964
outfile = args .outfile
947
965
OutputFile .write_vocab_only (outfile , vocab )
948
966
print (f"Wrote { outfile } " )
949
967
else :
950
- model_plus = load_some_model (args .model )
951
968
if args .dump :
952
969
do_dump_model (model_plus )
953
970
return
@@ -957,7 +974,6 @@ def main(args_in: Optional[List[str]] = None) -> None:
957
974
vocab_dir = args .vocab_dir if args .vocab_dir else model_plus .paths [0 ].parent
958
975
vocab = load_vocab (vocab_dir , args .vocabtype )
959
976
960
- params = Params .load (model_plus )
961
977
model = model_plus .model
962
978
model = do_necessary_conversions (model , params )
963
979
output_type = pick_output_type (model , args .outtype )
0 commit comments