@@ -25,27 +25,36 @@ def main():
25
25
quantize_script_binary = "quantize"
26
26
27
27
parser = argparse .ArgumentParser (
28
- prog = ' python3 quantize.py' ,
29
- description = ' This script quantizes the given models by applying the '
30
- f'"{ quantize_script_binary } " script on them.'
28
+ prog = " python3 quantize.py" ,
29
+ description = " This script quantizes the given models by applying the "
30
+ f'"{ quantize_script_binary } " script on them.' ,
31
31
)
32
32
parser .add_argument (
33
- 'models' , nargs = '+' , choices = ('7B' , '13B' , '30B' , '65B' ),
34
- help = 'The models to quantize.'
33
+ "models" ,
34
+ nargs = "+" ,
35
+ choices = ("7B" , "13B" , "30B" , "65B" ),
36
+ help = "The models to quantize." ,
35
37
)
36
38
parser .add_argument (
37
- '-r' , '--remove-16' , action = 'store_true' , dest = 'remove_f16' ,
38
- help = 'Remove the f16 model after quantizing it.'
39
+ "-r" ,
40
+ "--remove-16" ,
41
+ action = "store_true" ,
42
+ dest = "remove_f16" ,
43
+ help = "Remove the f16 model after quantizing it." ,
39
44
)
40
45
parser .add_argument (
41
- '-m' , '--models-path' , dest = 'models_path' ,
46
+ "-m" ,
47
+ "--models-path" ,
48
+ dest = "models_path" ,
42
49
default = os .path .join (os .getcwd (), "models" ),
43
- help = ' Specify the directory where the models are located.'
50
+ help = " Specify the directory where the models are located." ,
44
51
)
45
52
parser .add_argument (
46
- '-q' , '--quantize-script-path' , dest = 'quantize_script_path' ,
53
+ "-q" ,
54
+ "--quantize-script-path" ,
55
+ dest = "quantize_script_path" ,
47
56
default = os .path .join (os .getcwd (), quantize_script_binary ),
48
- help = 'Specify the path to the "quantize" script.'
57
+ help = 'Specify the path to the "quantize" script.' ,
49
58
)
50
59
51
60
# TODO: Revise this code
@@ -75,12 +84,12 @@ def main():
75
84
)
76
85
77
86
if not os .path .isfile (f16_model_path_base ):
78
- print (f' The file %s was not found' % f16_model_path_base )
87
+ print (f" The file %s was not found" % f16_model_path_base )
79
88
sys .exit (1 )
80
89
81
90
f16_model_parts_paths = map (
82
91
lambda filename : os .path .join (f16_model_path_base , filename ),
83
- glob .glob (f"{ f16_model_path_base } *" )
92
+ glob .glob (f"{ f16_model_path_base } *" ),
84
93
)
85
94
86
95
for f16_model_part_path in f16_model_parts_paths :
@@ -93,9 +102,7 @@ def main():
93
102
)
94
103
sys .exit (1 )
95
104
96
- __run_quantize_script (
97
- args .quantize_script_path , f16_model_part_path
98
- )
105
+ __run_quantize_script (args .quantize_script_path , f16_model_part_path )
99
106
100
107
if args .remove_f16 :
101
108
os .remove (f16_model_part_path )
@@ -104,15 +111,15 @@ def main():
104
111
# This was extracted to a top-level function for parallelization, if
105
112
# implemented. See https://github.com/ggerganov/llama.cpp/pull/222/commits/f8db3d6cd91bf1a1342db9d29e3092bc12dd783c#r1140496406
106
113
114
+
107
115
def __run_quantize_script (script_path , f16_model_part_path ):
108
116
"""Run the quantize script specifying the path to it and the path to the
109
117
f16 model to quantize.
110
118
"""
111
119
112
120
new_quantized_model_path = f16_model_part_path .replace ("f16" , "q4_0" )
113
121
subprocess .run (
114
- [script_path , f16_model_part_path , new_quantized_model_path , "2" ],
115
- check = True
122
+ [script_path , f16_model_part_path , new_quantized_model_path , "2" ], check = True
116
123
)
117
124
118
125
0 commit comments