@@ -69,6 +69,9 @@ class UnquantizedDataType:
69
69
'I32' : DT_I32 ,
70
70
}
71
71
72
+ # TODO: match this with `llama_ftype`
73
+ # TODO: rename to LLAMAFileType
74
+ # TODO: move to `gguf.py`
72
75
class GGMLFileType (enum .Enum ):
73
76
AllF32 = 0
74
77
MostlyF16 = 1 # except 1d tensors
@@ -101,6 +104,8 @@ class Params:
101
104
n_head_kv : int
102
105
f_norm_eps : float
103
106
107
+ ftype : Optional [GGMLFileType ] = None
108
+
104
109
@staticmethod
105
110
def find_n_mult (n_ff : int , n_embd : int ) -> int :
106
111
# hardcoded magic range
@@ -738,6 +743,9 @@ def add_meta_arch(self, params: Params) -> None:
738
743
self .gguf .add_head_count_kv (params .n_head_kv )
739
744
self .gguf .add_layer_norm_rms_eps (params .f_norm_eps )
740
745
746
+ if params .ftype :
747
+ self .gguf .add_file_type (params .ftype )
748
+
741
749
def add_meta_vocab (self , vocab : Vocab ) -> None :
742
750
tokens = []
743
751
scores = []
@@ -1020,6 +1028,12 @@ def main(args_in: Optional[List[str]] = None) -> None:
1020
1028
" - LLaMA v2: --ctx 4096\n " )
1021
1029
params .n_ctx = args .ctx
1022
1030
1031
+ if args .outtype :
1032
+ params .ftype = {
1033
+ "f32" : GGMLFileType .AllF32 ,
1034
+ "f16" : GGMLFileType .MostlyF16 ,
1035
+ }[args .outtype ]
1036
+
1023
1037
print (f"params = { params } " )
1024
1038
1025
1039
vocab : Vocab
@@ -1040,11 +1054,14 @@ def main(args_in: Optional[List[str]] = None) -> None:
1040
1054
vocab_dir = args .vocab_dir if args .vocab_dir else model_plus .paths [0 ].parent
1041
1055
vocab = load_vocab (vocab_dir , args .vocabtype )
1042
1056
1043
- model = model_plus .model
1044
- model = convert_model_names (model , params )
1045
- output_type = pick_output_type (model , args .outtype )
1046
- model = convert_to_output_type (model , output_type )
1047
- outfile = args .outfile or default_outfile (model_plus .paths , output_type )
1057
+ model = model_plus .model
1058
+ model = convert_model_names (model , params )
1059
+ ftype = pick_output_type (model , args .outtype )
1060
+ model = convert_to_output_type (model , ftype )
1061
+ outfile = args .outfile or default_outfile (model_plus .paths , ftype )
1062
+
1063
+ params .ftype = ftype
1064
+ print (f"Writing { outfile } , format { ftype } " )
1048
1065
1049
1066
OutputFile .write_all (outfile , params , model , vocab )
1050
1067
print (f"Wrote { outfile } " )
0 commit comments