Skip to content

Commit 382c0c6

Browse files
committed
blacked unversioned-ggml-to-ggml
1 parent efab7f8 commit 382c0c6

File tree

1 file changed

+20
-9
lines changed

1 file changed

+20
-9
lines changed

convert-unversioned-ggml-to-ggml.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,37 +10,43 @@
1010

1111
HPARAMS = keys = ["vocab_size", "dim", "multiple_of", "n_heads", "n_layers"]
1212

13+
1314
def parse_args():
14-
parser = argparse.ArgumentParser(description='Upgrade old ggml model files to the current format')
15-
parser.add_argument('dir_model', help='directory containing ggml .bin files')
16-
parser.add_argument('tokenizer_model', help='path to LLaMA tokenizer.model file')
15+
parser = argparse.ArgumentParser(
16+
description="Upgrade old ggml model files to the current format"
17+
)
18+
parser.add_argument("dir_model", help="directory containing ggml .bin files")
19+
parser.add_argument("tokenizer_model", help="path to LLaMA tokenizer.model file")
1720
return parser.parse_args()
1821

22+
1923
def read_header(f_in):
2024
struct_fmt = "i" * (3 + len(HPARAMS))
2125
struct_size = struct.calcsize(struct_fmt)
2226
buf = f_in.read(struct_size)
2327
return struct.unpack(struct_fmt, buf)
2428

29+
2530
def write_header(f_out, header):
2631
(magic, vocab_size, dim, multiple_of, n_heads, n_layers, rot, ftype) = header
2732

28-
if magic != 0x67676d6c:
29-
raise Exception('Invalid file magic. Must be an old style ggml file.')
33+
if magic != 0x67676D6C:
34+
raise Exception("Invalid file magic. Must be an old style ggml file.")
3035

3136
values = [
32-
0x67676d66, # magic: ggml in hex
33-
1, # file version
37+
0x67676D66, # magic: ggml in hex
38+
1, # file version
3439
vocab_size,
3540
dim,
3641
multiple_of,
3742
n_heads,
3843
n_layers,
3944
rot,
40-
ftype
45+
ftype,
4146
]
4247
f_out.write(struct.pack("i" * len(values), *values))
4348

49+
4450
def write_tokens(fout, tokenizer):
4551
for i in range(tokenizer.vocab_size()):
4652
if tokenizer.is_unknown(i):
@@ -60,22 +66,25 @@ def write_tokens(fout, tokenizer):
6066
fout.write(text)
6167
fout.write(struct.pack("f", tokenizer.get_score(i)))
6268

69+
6370
def read_tokens(f_in, tokenizer):
6471
for i in range(tokenizer.vocab_size()):
6572
len_b = f_in.read(4)
6673
(length,) = struct.unpack("i", len_b)
6774
f_in.read(length)
6875

76+
6977
def copy_all_data(f_out, f_in):
7078
while True:
7179
buf = f_in.read(1024 * 1024)
7280
if not buf:
7381
break
7482
f_out.write(buf)
7583

84+
7685
def convert_one_file(path_in, tokenizer):
7786
path_tmp = f"{path_in}.tmp"
78-
path_orig= f"{path_in}.orig"
87+
path_orig = f"{path_in}.orig"
7988
print(f"converting {path_in}")
8089
with open(path_in, "rb") as f_in, open(path_tmp, "wb") as f_out:
8190
write_header(f_out, read_header(f_in))
@@ -85,6 +94,7 @@ def convert_one_file(path_in, tokenizer):
8594
os.rename(path_in, path_orig)
8695
os.rename(path_tmp, path_in)
8796

97+
8898
def main():
8999
args = parse_args()
90100
files = []
@@ -96,5 +106,6 @@ def main():
96106
for file in files:
97107
convert_one_file(file, tokenizer)
98108

109+
99110
if __name__ == "__main__":
100111
main()

0 commit comments

Comments
 (0)