@@ -197,12 +197,20 @@ def get_dtype_and_ggml_type(name, tensor, ggml_type):
197
197
198
198
199
199
def dump_state_dict (f , ggml_type , input_dir , config ):
200
- weight_names = get_weight_names (config .num_hidden_layers )
201
200
weights = {}
202
201
203
- # First operate on meta tensors to find shapes and dtypes for GGUF header.
204
- for idx , name in enumerate (weight_names ):
205
- weight , scales = get_weights (f"{ input_dir } /tensor{ idx :05} _000" )
202
+ # Load weights in file order (mmap'ed).
203
+ for idx , name in enumerate (get_weight_names (config .num_hidden_layers )):
204
+ weights [name ] = get_weights (f"{ input_dir } /tensor{ idx :05} _000" )
205
+
206
+ logging .debug ("Loaded %i files" , len (weights ))
207
+
208
+ # But write in layer order.
209
+ weight_names = get_weight_names (config .num_hidden_layers , lexicographic = False )
210
+
211
+ # Operate on meta tensors to find shapes and dtypes for GGUF header.
212
+ for name in weight_names :
213
+ weight , scales = weights [name ]
206
214
meta_tensor = convert_weight (name , weight , scales , config , device = "meta" )
207
215
dtype , tensor_ggml_type = get_dtype_and_ggml_type (name , meta_tensor , ggml_type )
208
216
quantized_meta_tensor = maybe_quantize_tensor (meta_tensor , tensor_ggml_type )
@@ -213,8 +221,6 @@ def dump_state_dict(f, ggml_type, input_dir, config):
213
221
quantized_meta_tensor .nbytes ,
214
222
tensor_ggml_type ,
215
223
)
216
- weights [name ] = weight , scales
217
- logging .debug ("Loaded %i files" , len (weight_names ))
218
224
219
225
f .write_header_to_file ()
220
226
f .write_kv_data_to_file ()
@@ -244,7 +250,7 @@ def dump_state_dict(f, ggml_type, input_dir, config):
244
250
except NameError :
245
251
pass
246
252
247
- if len ( tensor_info ) != len ( weight_names ) :
253
+ if weights :
248
254
logging .warning ("Not all tensors are converted" )
249
255
250
256
@@ -293,8 +299,10 @@ def extract_vocabulary_from_model(vocab):
293
299
return tokens , scores , toktypes
294
300
295
301
296
- def get_weight_names (num_hidden_layers = 64 ):
297
- """Return Grok-1 weight names, in the order in which they are in the tensor#####_000 files."""
302
+ def get_weight_names (num_hidden_layers = 64 , lexicographic = True ):
303
+ """Return Grok-1 weight names.
304
+
305
+ If `lexicographic` is set, the order is as in the tensor#####_000 files."""
298
306
299
307
weight_names = [
300
308
gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .TOKEN_EMBD ],
@@ -317,7 +325,10 @@ def get_weight_names(num_hidden_layers=64):
317
325
)
318
326
319
327
layers = [str (bid ) for bid in range (64 )]
320
- layers .sort () # Lexicographic sort: 0 < 1 < 10 < 11 ... < 2 < 20 < ...
328
+
329
+ if lexicographic :
330
+ # Lexicographic sort: 0 < 1 < 10 < 11 ... < 2 < 20 < ...
331
+ layers .sort ()
321
332
322
333
for bid in layers [:num_hidden_layers ]:
323
334
for key in layer :
0 commit comments