@@ -123,83 +123,103 @@ def load(self, data, offset):
123
123
self .tensor_map = tensor_map
124
124
return offset
125
125
126
- def save_gguf (ggml_model , data , cfg ):
127
- hp = ggml_model .hyperparameters
128
- ff_tensor_idx = ggml_model .tensor_map .get (b'layers.0.feed_forward.w1.weight' )
129
- assert ff_tensor_idx is not None , 'Missing layer 0 FF tensor'
130
- ff_tensor = ggml_model .tensors [ff_tensor_idx ]
131
- if cfg .gqa == 1 :
132
- n_kv_head = hp .n_head
133
- else :
134
- gqa = float (cfg .gqa )
135
- n_kv_head = None
136
- for x in range (1 , 256 ):
137
- if float (hp .n_head ) / float (x ) == gqa :
138
- n_kv_head = x
139
- assert n_kv_head is not None , "Couldn't determine n_kv_head from GQA param"
140
- print (f'- Guessed n_kv_head = { n_kv_head } based on GQA { cfg .gqa } ' )
141
- nm = gguf .get_tensor_name_map (gguf .MODEL_ARCH .LLAMA , hp .n_layer )
142
- gguf_writer = gguf .GGUFWriter (cfg .output , gguf .MODEL_ARCH_NAMES [gguf .MODEL_ARCH .LLAMA ], use_temp_file = False )
143
- #gguf_writer.add_name('meep')
144
- #gguf_writer.add_source_hf_repo('merp')
145
- # gguf_writer.add_tensor_data_layout("Meta AI original pth")
146
- gguf_writer .add_context_length (cfg .context_length )
147
- gguf_writer .add_embedding_length (hp .n_embd )
148
- gguf_writer .add_block_count (hp .n_layer )
149
- gguf_writer .add_feed_forward_length (ff_tensor .dims [1 ])
150
- print ('FF dim' , ff_tensor .dims [1 ])
151
- gguf_writer .add_rope_dimension_count (hp .n_embd // hp .n_head )
152
- gguf_writer .add_head_count (hp .n_head )
153
- gguf_writer .add_head_count_kv (n_kv_head )
154
- gguf_writer .add_layer_norm_rms_eps (float (cfg .eps ))
155
- gguf_writer .add_tokenizer_model ('llama' )
156
- tokens = []
157
- scores = []
158
- print (f'* Adding { hp .n_vocab } vocab item(s)' )
159
- toktypes = []
160
- for (tokid , (vbytes , vscore )) in enumerate (ggml_model .vocab .items ):
161
- if len (vbytes ) > 1 and vbytes [0 ] == 32 :
162
- vbytes = vbytes .replace (b' ' , b'\xe2 \x96 \x81 ' )
163
- tt = 1
164
- if len (vbytes ) == 0 :
165
- tt = 3
166
- elif tokid >= 3 and tokid <= 258 and len (vbytes ) == 1 :
167
- hv = hex (vbytes [0 ])[2 :].upper ()
168
- vbytes = bytes (f'<0x{ hv } >' , encoding = 'UTF-8' )
169
- tt = 6
170
- toktypes .append (tt )
171
- tokens .append (vbytes )
172
- scores .append (vscore )
173
- gguf_writer .add_token_list (tokens )
174
- gguf_writer .add_token_scores (scores )
175
- gguf_writer .add_token_types (toktypes )
176
- print ('* Adding tensors' )
177
- for tensor in ggml_model .tensors :
178
- name = str (tensor .name , 'UTF-8' )
179
- if name .endswith ('.weight' ):
180
- name = name [:- 7 ]
181
- suffix = '.weight'
182
- elif name .endswith ('.bias' ):
183
- name = name [:- 5 ]
184
- suffix = '.bias'
185
- mapped_name = nm .get (name )
186
- assert mapped_name is not None , f'Bad name { name } '
187
- mapped_name += suffix
188
- tempdims = list (tensor .dims [:])
189
- if len (tempdims ) > 1 :
190
- temp = tempdims [1 ]
191
- tempdims [1 ] = tempdims [0 ]
192
- tempdims [0 ] = temp
193
- print (f'+ { tensor .name } | { mapped_name } { tensor .dims } :: { tempdims } ' )
194
- gguf_writer .add_tensor (mapped_name , data [tensor .start_offset :tensor .start_offset + tensor .len_bytes ], raw_shape = tempdims , raw_dtype = tensor .dtype )
195
- print ("gguf: write header" )
196
- gguf_writer .write_header_to_file ()
197
- print ("gguf: write metadata" )
198
- gguf_writer .write_kv_data_to_file ()
199
- print ("gguf: write tensors" )
200
- gguf_writer .write_tensors_to_file ()
201
-
202
- gguf_writer .close ()
126
+ class GGMLToGGUF :
127
+ def __init__ (self , ggml_model , data , cfg ):
128
+ hp = ggml_model .hyperparameters
129
+ self .model = ggml_model
130
+ self .data = data
131
+ self .cfg = cfg
132
+ ff_tensor_idx = ggml_model .tensor_map .get (b'layers.0.feed_forward.w1.weight' )
133
+ assert ff_tensor_idx is not None , 'Missing layer 0 FF tensor'
134
+ ff_tensor = ggml_model .tensors [ff_tensor_idx ]
135
+ self .ff_length = ff_tensor .dims [1 ]
136
+ if cfg .gqa == 1 :
137
+ n_kv_head = hp .n_head
138
+ else :
139
+ gqa = float (cfg .gqa )
140
+ n_kv_head = None
141
+ for x in range (1 , 256 ):
142
+ if float (hp .n_head ) / float (x ) == gqa :
143
+ n_kv_head = x
144
+ assert n_kv_head is not None , "Couldn't determine n_kv_head from GQA param"
145
+ print (f'- Guessed n_kv_head = { n_kv_head } based on GQA { cfg .gqa } ' )
146
+ self .n_kv_head = n_kv_head
147
+ self .name_map = gguf .get_tensor_name_map (gguf .MODEL_ARCH .LLAMA , ggml_model .hyperparameters .n_layer )
148
+
149
+ def save (self ):
150
+ print ('* Preparing to save GGUF file' )
151
+ gguf_writer = gguf .GGUFWriter (self .cfg .output , gguf .MODEL_ARCH_NAMES [gguf .MODEL_ARCH .LLAMA ], use_temp_file = False )
152
+ self .add_params (gguf_writer )
153
+ self .add_vocab (gguf_writer )
154
+ self .add_tensors (gguf_writer )
155
+ print (" gguf: write header" )
156
+ gguf_writer .write_header_to_file ()
157
+ print (" gguf: write metadata" )
158
+ gguf_writer .write_kv_data_to_file ()
159
+ print (" gguf: write tensors" )
160
+ gguf_writer .write_tensors_to_file ()
161
+ gguf_writer .close ()
162
+
163
+ def add_params (self , gguf_writer ):
164
+ hp = self .model .hyperparameters
165
+ cfg = self .cfg
166
+ print ('* Adding model parameters and KV items' )
167
+ gguf_writer .add_context_length (cfg .context_length )
168
+ gguf_writer .add_embedding_length (hp .n_embd )
169
+ gguf_writer .add_block_count (hp .n_layer )
170
+ gguf_writer .add_feed_forward_length (self .ff_length )
171
+ gguf_writer .add_rope_dimension_count (hp .n_embd // hp .n_head )
172
+ gguf_writer .add_head_count (hp .n_head )
173
+ gguf_writer .add_head_count_kv (self .n_kv_head )
174
+ gguf_writer .add_layer_norm_rms_eps (float (cfg .eps ))
175
+ gguf_writer .add_tokenizer_model ('llama' )
176
+
177
+ def add_vocab (self , gguf_writer ):
178
+ hp = self .model .hyperparameters
179
+ tokens = []
180
+ scores = []
181
+ print (f'* Adding { hp .n_vocab } vocab item(s)' )
182
+ toktypes = []
183
+ for (tokid , (vbytes , vscore )) in enumerate (self .model .vocab .items ):
184
+ tt = 1
185
+ if len (vbytes ) > 1 and vbytes [0 ] == 32 :
186
+ vbytes = vbytes .replace (b' ' , b'\xe2 \x96 \x81 ' )
187
+ elif len (vbytes ) == 0 :
188
+ tt = 3
189
+ elif tokid >= 3 and tokid <= 258 and len (vbytes ) == 1 :
190
+ hv = hex (vbytes [0 ])[2 :].upper ()
191
+ vbytes = bytes (f'<0x{ hv } >' , encoding = 'UTF-8' )
192
+ tt = 6
193
+ toktypes .append (tt )
194
+ tokens .append (vbytes )
195
+ scores .append (vscore )
196
+ gguf_writer .add_token_list (tokens )
197
+ gguf_writer .add_token_scores (scores )
198
+ gguf_writer .add_token_types (toktypes )
199
+
200
+ def add_tensors (self , gguf_writer ):
201
+ nm = self .name_map
202
+ data = self .data
203
+ print (f'* Adding { len (self .model .tensors )} tensor(s)' )
204
+ for tensor in self .model .tensors :
205
+ name = str (tensor .name , 'UTF-8' )
206
+ if name .endswith ('.weight' ):
207
+ name = name [:- 7 ]
208
+ suffix = '.weight'
209
+ elif name .endswith ('.bias' ):
210
+ name = name [:- 5 ]
211
+ suffix = '.bias'
212
+ mapped_name = nm .get (name )
213
+ assert mapped_name is not None , f'Bad name { name } '
214
+ mapped_name += suffix
215
+ tempdims = list (tensor .dims [:])
216
+ if len (tempdims ) > 1 :
217
+ temp = tempdims [1 ]
218
+ tempdims [1 ] = tempdims [0 ]
219
+ tempdims [0 ] = temp
220
+ # print(f'+ {tensor.name} | {mapped_name} {tensor.dims} :: {tempdims}')
221
+ gguf_writer .add_tensor (mapped_name , data [tensor .start_offset :tensor .start_offset + tensor .len_bytes ], raw_shape = tempdims , raw_dtype = tensor .dtype )
222
+
203
223
204
224
def handle_args ():
205
225
parser = argparse .ArgumentParser (description = 'Convert GGMLv3 models to GGUF' )
@@ -212,12 +232,15 @@ def handle_args():
212
232
213
233
def main ():
214
234
cfg = handle_args ()
235
+ print (f'* Using config: { cfg } ' )
236
+ print ('\n === WARNING === Be aware that this conversion script is best-effort. Use a native GGUF model if possible. === WARNING ===\n ' )
215
237
data = np .memmap (cfg .input , mode = 'r' )
216
238
model = GGMLV3Model ()
239
+ print ('* Scanning GGML input file' )
217
240
offset = model .load (data , 0 )
218
241
print (model .hyperparameters )
219
- # print (model.vocab.items )
220
- # return
221
- save_gguf ( model , data , cfg )
242
+ converter = GGMLToGGUF (model , data , cfg )
243
+ converter . save ()
244
+ print ( f'* Successful completion. Output saved to: { cfg . output } ' )
222
245
223
246
main ()
0 commit comments