Skip to content

Commit a456abd

Browse files
committed
Cleanups, better output during conversion
1 parent 4fd0635 commit a456abd

File tree

1 file changed

+103
-80
lines changed

1 file changed

+103
-80
lines changed

convert-llama-ggmlv3-to-gguf.py

Lines changed: 103 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -123,83 +123,103 @@ def load(self, data, offset):
123123
self.tensor_map = tensor_map
124124
return offset
125125

126-
def save_gguf(ggml_model, data, cfg):
127-
hp = ggml_model.hyperparameters
128-
ff_tensor_idx = ggml_model.tensor_map.get(b'layers.0.feed_forward.w1.weight')
129-
assert ff_tensor_idx is not None, 'Missing layer 0 FF tensor'
130-
ff_tensor = ggml_model.tensors[ff_tensor_idx]
131-
if cfg.gqa == 1:
132-
n_kv_head = hp.n_head
133-
else:
134-
gqa = float(cfg.gqa)
135-
n_kv_head = None
136-
for x in range(1, 256):
137-
if float(hp.n_head) / float(x) == gqa:
138-
n_kv_head = x
139-
assert n_kv_head is not None, "Couldn't determine n_kv_head from GQA param"
140-
print(f'- Guessed n_kv_head = {n_kv_head} based on GQA {cfg.gqa}')
141-
nm = gguf.get_tensor_name_map(gguf.MODEL_ARCH.LLAMA, hp.n_layer)
142-
gguf_writer = gguf.GGUFWriter(cfg.output, gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA], use_temp_file = False)
143-
#gguf_writer.add_name('meep')
144-
#gguf_writer.add_source_hf_repo('merp')
145-
# gguf_writer.add_tensor_data_layout("Meta AI original pth")
146-
gguf_writer.add_context_length(cfg.context_length)
147-
gguf_writer.add_embedding_length(hp.n_embd)
148-
gguf_writer.add_block_count(hp.n_layer)
149-
gguf_writer.add_feed_forward_length(ff_tensor.dims[1])
150-
print('FF dim', ff_tensor.dims[1])
151-
gguf_writer.add_rope_dimension_count(hp.n_embd // hp.n_head)
152-
gguf_writer.add_head_count(hp.n_head)
153-
gguf_writer.add_head_count_kv(n_kv_head)
154-
gguf_writer.add_layer_norm_rms_eps(float(cfg.eps))
155-
gguf_writer.add_tokenizer_model('llama')
156-
tokens = []
157-
scores = []
158-
print(f'* Adding {hp.n_vocab} vocab item(s)')
159-
toktypes = []
160-
for (tokid, (vbytes, vscore)) in enumerate(ggml_model.vocab.items):
161-
if len(vbytes) > 1 and vbytes[0] == 32:
162-
vbytes = vbytes.replace(b' ', b'\xe2\x96\x81')
163-
tt = 1
164-
if len(vbytes) == 0:
165-
tt = 3
166-
elif tokid >= 3 and tokid <= 258 and len(vbytes) == 1:
167-
hv = hex(vbytes[0])[2:].upper()
168-
vbytes = bytes(f'<0x{hv}>', encoding = 'UTF-8')
169-
tt = 6
170-
toktypes.append(tt)
171-
tokens.append(vbytes)
172-
scores.append(vscore)
173-
gguf_writer.add_token_list(tokens)
174-
gguf_writer.add_token_scores(scores)
175-
gguf_writer.add_token_types(toktypes)
176-
print('* Adding tensors')
177-
for tensor in ggml_model.tensors:
178-
name = str(tensor.name, 'UTF-8')
179-
if name.endswith('.weight'):
180-
name = name[:-7]
181-
suffix = '.weight'
182-
elif name.endswith('.bias'):
183-
name = name[:-5]
184-
suffix = '.bias'
185-
mapped_name = nm.get(name)
186-
assert mapped_name is not None, f'Bad name {name}'
187-
mapped_name += suffix
188-
tempdims = list(tensor.dims[:])
189-
if len(tempdims) > 1:
190-
temp = tempdims[1]
191-
tempdims[1] = tempdims[0]
192-
tempdims[0] = temp
193-
print(f'+ {tensor.name} | {mapped_name} {tensor.dims} :: {tempdims}')
194-
gguf_writer.add_tensor(mapped_name, data[tensor.start_offset:tensor.start_offset + tensor.len_bytes], raw_shape = tempdims, raw_dtype = tensor.dtype)
195-
print("gguf: write header")
196-
gguf_writer.write_header_to_file()
197-
print("gguf: write metadata")
198-
gguf_writer.write_kv_data_to_file()
199-
print("gguf: write tensors")
200-
gguf_writer.write_tensors_to_file()
201-
202-
gguf_writer.close()
126+
class GGMLToGGUF:
127+
def __init__(self, ggml_model, data, cfg):
128+
hp = ggml_model.hyperparameters
129+
self.model = ggml_model
130+
self.data = data
131+
self.cfg = cfg
132+
ff_tensor_idx = ggml_model.tensor_map.get(b'layers.0.feed_forward.w1.weight')
133+
assert ff_tensor_idx is not None, 'Missing layer 0 FF tensor'
134+
ff_tensor = ggml_model.tensors[ff_tensor_idx]
135+
self.ff_length = ff_tensor.dims[1]
136+
if cfg.gqa == 1:
137+
n_kv_head = hp.n_head
138+
else:
139+
gqa = float(cfg.gqa)
140+
n_kv_head = None
141+
for x in range(1, 256):
142+
if float(hp.n_head) / float(x) == gqa:
143+
n_kv_head = x
144+
assert n_kv_head is not None, "Couldn't determine n_kv_head from GQA param"
145+
print(f'- Guessed n_kv_head = {n_kv_head} based on GQA {cfg.gqa}')
146+
self.n_kv_head = n_kv_head
147+
self.name_map = gguf.get_tensor_name_map(gguf.MODEL_ARCH.LLAMA, ggml_model.hyperparameters.n_layer)
148+
149+
def save(self):
150+
print('* Preparing to save GGUF file')
151+
gguf_writer = gguf.GGUFWriter(self.cfg.output, gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA], use_temp_file = False)
152+
self.add_params(gguf_writer)
153+
self.add_vocab(gguf_writer)
154+
self.add_tensors(gguf_writer)
155+
print(" gguf: write header")
156+
gguf_writer.write_header_to_file()
157+
print(" gguf: write metadata")
158+
gguf_writer.write_kv_data_to_file()
159+
print(" gguf: write tensors")
160+
gguf_writer.write_tensors_to_file()
161+
gguf_writer.close()
162+
163+
def add_params(self, gguf_writer):
164+
hp = self.model.hyperparameters
165+
cfg = self.cfg
166+
print('* Adding model parameters and KV items')
167+
gguf_writer.add_context_length(cfg.context_length)
168+
gguf_writer.add_embedding_length(hp.n_embd)
169+
gguf_writer.add_block_count(hp.n_layer)
170+
gguf_writer.add_feed_forward_length(self.ff_length)
171+
gguf_writer.add_rope_dimension_count(hp.n_embd // hp.n_head)
172+
gguf_writer.add_head_count(hp.n_head)
173+
gguf_writer.add_head_count_kv(self.n_kv_head)
174+
gguf_writer.add_layer_norm_rms_eps(float(cfg.eps))
175+
gguf_writer.add_tokenizer_model('llama')
176+
177+
def add_vocab(self, gguf_writer):
178+
hp = self.model.hyperparameters
179+
tokens = []
180+
scores = []
181+
print(f'* Adding {hp.n_vocab} vocab item(s)')
182+
toktypes = []
183+
for (tokid, (vbytes, vscore)) in enumerate(self.model.vocab.items):
184+
tt = 1
185+
if len(vbytes) > 1 and vbytes[0] == 32:
186+
vbytes = vbytes.replace(b' ', b'\xe2\x96\x81')
187+
elif len(vbytes) == 0:
188+
tt = 3
189+
elif tokid >= 3 and tokid <= 258 and len(vbytes) == 1:
190+
hv = hex(vbytes[0])[2:].upper()
191+
vbytes = bytes(f'<0x{hv}>', encoding = 'UTF-8')
192+
tt = 6
193+
toktypes.append(tt)
194+
tokens.append(vbytes)
195+
scores.append(vscore)
196+
gguf_writer.add_token_list(tokens)
197+
gguf_writer.add_token_scores(scores)
198+
gguf_writer.add_token_types(toktypes)
199+
200+
def add_tensors(self, gguf_writer):
201+
nm = self.name_map
202+
data = self.data
203+
print(f'* Adding {len(self.model.tensors)} tensor(s)')
204+
for tensor in self.model.tensors:
205+
name = str(tensor.name, 'UTF-8')
206+
if name.endswith('.weight'):
207+
name = name[:-7]
208+
suffix = '.weight'
209+
elif name.endswith('.bias'):
210+
name = name[:-5]
211+
suffix = '.bias'
212+
mapped_name = nm.get(name)
213+
assert mapped_name is not None, f'Bad name {name}'
214+
mapped_name += suffix
215+
tempdims = list(tensor.dims[:])
216+
if len(tempdims) > 1:
217+
temp = tempdims[1]
218+
tempdims[1] = tempdims[0]
219+
tempdims[0] = temp
220+
# print(f'+ {tensor.name} | {mapped_name} {tensor.dims} :: {tempdims}')
221+
gguf_writer.add_tensor(mapped_name, data[tensor.start_offset:tensor.start_offset + tensor.len_bytes], raw_shape = tempdims, raw_dtype = tensor.dtype)
222+
203223

204224
def handle_args():
205225
parser = argparse.ArgumentParser(description = 'Convert GGMLv3 models to GGUF')
@@ -212,12 +232,15 @@ def handle_args():
212232

213233
def main():
214234
cfg = handle_args()
235+
print(f'* Using config: {cfg}')
236+
print('\n=== WARNING === Be aware that this conversion script is best-effort. Use a native GGUF model if possible. === WARNING ===\n')
215237
data = np.memmap(cfg.input, mode = 'r')
216238
model = GGMLV3Model()
239+
print('* Scanning GGML input file')
217240
offset = model.load(data, 0)
218241
print(model.hyperparameters)
219-
# print(model.vocab.items)
220-
# return
221-
save_gguf(model, data, cfg)
242+
converter = GGMLToGGUF(model, data, cfg)
243+
converter.save()
244+
print(f'* Successful completion. Output saved to: {cfg.output}')
222245

223246
main()

0 commit comments

Comments
 (0)