@@ -130,22 +130,44 @@ def element_count_rounded_notation(count: int) -> str:
130
130
131
131
132
132
def translate_tensor_name (name ):
133
- import re
134
- words = re .split (r"[._]" , name )
133
+ words = name .split ("." )
135
134
135
+ # Source: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#standardized-tensor-names
136
136
abbreviation_dictionary = {
137
- 'ffn' : 'Feed Forward' ,
138
- 'attn' : 'Attention' ,
139
- 'blk' : 'Block' ,
140
- 'norm' : 'Normalization' ,
141
- 'embd' : 'Embedding' ,
137
+ 'token_embd' : 'Token embedding' ,
138
+ 'pos_embd' : 'Position embedding' ,
139
+ 'output_norm' : 'Output normalization' ,
140
+ 'output' : 'Output' ,
141
+ 'attn_norm' : 'Attention normalization' ,
142
+ 'attn_norm_2' : 'Attention normalization' ,
143
+ 'attn_qkv' : 'Attention query-key-value' ,
144
+ 'attn_q' : 'Attention query' ,
145
+ 'attn_k' : 'Attention key' ,
146
+ 'attn_v' : 'Attention value' ,
147
+ 'attn_output' : 'Attention output' ,
148
+ 'ffn_norm' : 'Feed-forward network normalization' ,
149
+ 'ffn_up' : 'Feed-forward network "up"' ,
150
+ 'ffn_gate' : 'Feed-forward network "gate"' ,
151
+ 'ffn_down' : 'Feed-forward network "down"' ,
152
+ 'ffn_gate_inp' : 'Expert-routing layer for the Feed-forward network in Mixture of Expert models' ,
153
+ 'ffn_gate_exp' : 'Feed-forward network "gate" layer per expert in Mixture of Expert models' ,
154
+ 'ffn_down_exp' : 'Feed-forward network "down" layer per expert in Mixture of Expert models' ,
155
+ 'ffn_up_exp' : 'Feed-forward network "up" layer per expert in Mixture of Expert models' ,
156
+ 'ssm_in' : 'State space model input projections' ,
157
+ 'ssm_conv1d' : 'State space model rolling/shift' ,
158
+ 'ssm_x' : 'State space model selective parametrization' ,
159
+ 'ssm_a' : 'State space model state compression' ,
160
+ 'ssm_d' : 'State space model skip connection' ,
161
+ 'ssm_dt' : 'State space model time step' ,
162
+ 'ssm_out' : 'State space model output projection' ,
163
+ 'blk' : 'Block'
142
164
}
143
165
144
166
expanded_words = []
145
167
for word in words :
146
168
word_norm = word .strip ().lower ()
147
169
if word_norm in abbreviation_dictionary :
148
- expanded_words .append (abbreviation_dictionary [word_norm ])
170
+ expanded_words .append (abbreviation_dictionary [word_norm ]. title () )
149
171
else :
150
172
expanded_words .append (word .title ())
151
173
@@ -187,20 +209,22 @@ def dump_markdown_metadata(reader: GGUFReader, args: argparse.Namespace) -> None
187
209
if not args .no_tensors :
188
210
# Group tensors by their prefix and maintain order
189
211
tensor_prefix_order = []
212
+ tensor_name_to_key = {}
190
213
tensor_groups = {}
191
214
total_elements = sum (tensor .n_elements for tensor in reader .tensors )
192
215
193
- for tensor in reader .tensors :
194
- tensor_name = tensor .name .replace (".weight" , "" )
195
- tensor_components = tensor_name .split ('.' )
216
+ for key , tensor in enumerate (reader .tensors ):
217
+ tensor_components = tensor .name .split ('.' )
196
218
tensor_prefix = tensor_components [0 ]
219
+
197
220
if tensor_prefix == 'blk' :
198
221
tensor_prefix = f"{ tensor_components [0 ]} .{ tensor_components [1 ]} "
199
222
200
223
if tensor_prefix not in tensor_groups :
201
224
tensor_groups [tensor_prefix ] = []
202
225
tensor_prefix_order .append (tensor_prefix )
203
226
227
+ tensor_name_to_key [tensor .name ] = key
204
228
tensor_groups [tensor_prefix ].append (tensor )
205
229
206
230
# Generate Markdown metadata
@@ -217,14 +241,13 @@ def dump_markdown_metadata(reader: GGUFReader, args: argparse.Namespace) -> None
217
241
group_elements = sum (tensor .n_elements for tensor in tensors )
218
242
group_percentage = group_elements / total_elements * 100
219
243
markdown_content += f"### { translate_tensor_name (group )} Tensor Group : { element_count_rounded_notation (group_elements )} Elements <a name=\" { group .replace ('.' , '_' )} \" ></a>\n "
220
- markdown_content += "| Tensor Name | Human Friendly Name | Elements | Shape | Type |\n "
221
- markdown_content += "|----------------------| -------------------------------------|----------------|---------------------------------|------|\n "
244
+ markdown_content += "| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type |\n "
245
+ markdown_content += "|------| ---------------------------|--------------- -------------------------------------|----------------|---------------------------------|------|\n "
222
246
223
247
for tensor in tensors :
224
- tensor_name = tensor .name .replace (".weight" , "" )
225
- human_friendly_name = translate_tensor_name (tensor .name .replace (".weight" , "" ))
248
+ human_friendly_name = translate_tensor_name (tensor .name .replace (".weight" , ".(W)" ).replace (".bias" , ".(B)" ))
226
249
prettydims = ' x ' .join ('{0:^5}' .format (d ) for d in list (tensor .shape ) + [1 ] * (4 - len (tensor .shape )))
227
- markdown_content += f"| { tensor_name :20 } | { human_friendly_name :35 } | ({ element_count_rounded_notation (tensor .n_elements ):>4} ) { tensor .n_elements :7} | [{ prettydims :29} ] | { tensor .tensor_type .name :4} |\n "
250
+ markdown_content += f"| { tensor_name_to_key [ tensor . name ]:4 } | { tensor . name :25 } | { human_friendly_name :50 } | ({ element_count_rounded_notation (tensor .n_elements ):>4} ) { tensor .n_elements :7} | [{ prettydims :29} ] | { tensor .tensor_type .name :4} |\n "
228
251
markdown_content += "\n "
229
252
markdown_content += f"- Total elements in { group } : ({ element_count_rounded_notation (group_elements ):>4} ) { group_elements } \n "
230
253
markdown_content += f"- Percentage of total elements: { group_percentage :.2f} %\n "
0 commit comments