Skip to content

Commit 172febf

Browse files
committed
blacked quantize
1 parent 382c0c6 commit 172febf

File tree

1 file changed

+25
-18
lines changed

1 file changed

+25
-18
lines changed

quantize.py

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -25,27 +25,36 @@ def main():
2525
quantize_script_binary = "quantize"
2626

2727
parser = argparse.ArgumentParser(
28-
prog='python3 quantize.py',
29-
description='This script quantizes the given models by applying the '
30-
f'"{quantize_script_binary}" script on them.'
28+
prog="python3 quantize.py",
29+
description="This script quantizes the given models by applying the "
30+
f'"{quantize_script_binary}" script on them.',
3131
)
3232
parser.add_argument(
33-
'models', nargs='+', choices=('7B', '13B', '30B', '65B'),
34-
help='The models to quantize.'
33+
"models",
34+
nargs="+",
35+
choices=("7B", "13B", "30B", "65B"),
36+
help="The models to quantize.",
3537
)
3638
parser.add_argument(
37-
'-r', '--remove-16', action='store_true', dest='remove_f16',
38-
help='Remove the f16 model after quantizing it.'
39+
"-r",
40+
"--remove-16",
41+
action="store_true",
42+
dest="remove_f16",
43+
help="Remove the f16 model after quantizing it.",
3944
)
4045
parser.add_argument(
41-
'-m', '--models-path', dest='models_path',
46+
"-m",
47+
"--models-path",
48+
dest="models_path",
4249
default=os.path.join(os.getcwd(), "models"),
43-
help='Specify the directory where the models are located.'
50+
help="Specify the directory where the models are located.",
4451
)
4552
parser.add_argument(
46-
'-q', '--quantize-script-path', dest='quantize_script_path',
53+
"-q",
54+
"--quantize-script-path",
55+
dest="quantize_script_path",
4756
default=os.path.join(os.getcwd(), quantize_script_binary),
48-
help='Specify the path to the "quantize" script.'
57+
help='Specify the path to the "quantize" script.',
4958
)
5059

5160
# TODO: Revise this code
@@ -75,12 +84,12 @@ def main():
7584
)
7685

7786
if not os.path.isfile(f16_model_path_base):
78-
print(f'The file %s was not found' % f16_model_path_base)
87+
print(f"The file %s was not found" % f16_model_path_base)
7988
sys.exit(1)
8089

8190
f16_model_parts_paths = map(
8291
lambda filename: os.path.join(f16_model_path_base, filename),
83-
glob.glob(f"{f16_model_path_base}*")
92+
glob.glob(f"{f16_model_path_base}*"),
8493
)
8594

8695
for f16_model_part_path in f16_model_parts_paths:
@@ -93,9 +102,7 @@ def main():
93102
)
94103
sys.exit(1)
95104

96-
__run_quantize_script(
97-
args.quantize_script_path, f16_model_part_path
98-
)
105+
__run_quantize_script(args.quantize_script_path, f16_model_part_path)
99106

100107
if args.remove_f16:
101108
os.remove(f16_model_part_path)
@@ -104,15 +111,15 @@ def main():
104111
# This was extracted to a top-level function for parallelization, if
105112
# implemented. See https://github.com/ggerganov/llama.cpp/pull/222/commits/f8db3d6cd91bf1a1342db9d29e3092bc12dd783c#r1140496406
106113

114+
107115
def __run_quantize_script(script_path, f16_model_part_path):
108116
"""Run the quantize script specifying the path to it and the path to the
109117
f16 model to quantize.
110118
"""
111119

112120
new_quantized_model_path = f16_model_part_path.replace("f16", "q4_0")
113121
subprocess.run(
114-
[script_path, f16_model_part_path, new_quantized_model_path, "2"],
115-
check=True
122+
[script_path, f16_model_part_path, new_quantized_model_path, "2"], check=True
116123
)
117124

118125

0 commit comments

Comments
 (0)