@@ -95,6 +95,7 @@ def get_tensors(self):
95
95
96
96
with ctx as model_part :
97
97
for name in model_part .keys ():
98
+ print ("yield " , name )
98
99
data = model_part .get_tensor (name ) if self .is_safetensors else model_part [name ]
99
100
yield name , data
100
101
@@ -306,6 +307,54 @@ def set_gguf_parameters(self):
306
307
self .gguf_writer .add_clamp_kqv (self .hparams ["attn_config" ]["clip_qkv" ])
307
308
self .gguf_writer .add_max_alibi_bias (self .hparams ["attn_config" ]["alibi_bias_max" ])
308
309
310
+ def write_tensors (self ):
311
+ block_count = self .hparams .get ("n_layers" , self .hparams .get ("num_hidden_layers" ))
312
+ tensor_map = gguf .get_tensor_name_map (self .model_arch , block_count )
313
+ for name , data in self .get_tensors ():
314
+ # we don't need these
315
+ if name .endswith (".attention.masked_bias" ) or name .endswith (".attention.bias" ) or name .endswith (".attention.rotary_emb.inv_freq" ):
316
+ continue
317
+
318
+ old_dtype = data .dtype
319
+
320
+ # convert any unsupported data types to float32
321
+ if data .dtype != torch .float16 and data .dtype != torch .float32 :
322
+ data = data .to (torch .float32 )
323
+
324
+ data = data .squeeze ().numpy ()
325
+
326
+ # map tensor names
327
+ new_name = tensor_map .get_name (name , try_suffixes = (".weight" , ".bias" ))
328
+ if new_name is None :
329
+ print ("Can not map tensor '" + name + "'" )
330
+ sys .exit ()
331
+
332
+ n_dims = len (data .shape )
333
+ data_dtype = data .dtype
334
+
335
+ # if f32 desired, convert any float16 to float32
336
+ if self .ftype == 0 and data_dtype == np .float16 :
337
+ data = data .astype (np .float32 )
338
+
339
+ # TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
340
+ if self .ftype == 1 and data_dtype == np .float16 and n_dims == 1 :
341
+ data = data .astype (np .float32 )
342
+
343
+ # if f16 desired, convert any float32 2-dim weight tensors to float16
344
+ if self .ftype == 1 and data_dtype == np .float32 and name .endswith (".weight" ) and n_dims == 2 :
345
+ data = data .astype (np .float16 )
346
+
347
+ print (new_name + ", n_dims = " + str (n_dims ) + ", " + str (old_dtype ) + " --> " + str (data .dtype ))
348
+
349
+ self .gguf_writer .add_tensor (new_name , data )
350
+
351
+ # note: MPT output is tied to (same as) wte in original model;
352
+ # for easier implementation in llama.cpp it's duplicated in GGUF, though :/
353
+ if new_name == "token_embd.weight" :
354
+ self .gguf_writer .add_tensor ("output.weight" , data )
355
+
356
+
357
+
309
358
class BaichuanModel (Model ):
310
359
def set_vocab (self ):
311
360
from sentencepiece import SentencePieceProcessor # type: ignore[import]
0 commit comments