Skip to content

Commit 2bf6dc7

Browse files
committed
gguf-dump.py: markdownTableWithAlignmentSupport() added
1 parent e38b649 commit 2bf6dc7

File tree

1 file changed

+88
-15
lines changed

1 file changed

+88
-15
lines changed

gguf-py/scripts/gguf-dump.py

Lines changed: 88 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,54 @@ def dump_metadata_json(reader: GGUFReader, args: argparse.Namespace) -> None:
101101
json.dump(result, sys.stdout)
102102

103103

104+
def markdownTableWithAlignmentSupport(header_map, data):
105+
# JSON to Markdown table formatting: https://stackoverflow.com/a/72983854/2850957
106+
107+
# Alignment Utility Function
108+
def strAlign(padding:int, alignMode:str, strVal:str):
109+
if alignMode == 'center':
110+
return strVal.center(padding)
111+
elif alignMode == 'right':
112+
return strVal.rjust(padding - 1) + ' '
113+
elif alignMode == 'left':
114+
return ' ' + strVal.ljust(padding - 1)
115+
else: # default left
116+
return ' ' + strVal.ljust(padding - 1)
117+
118+
def dashAlign(padding:int, alignMode:str):
119+
if alignMode == 'center':
120+
return ':' + '-' * (padding - 2) + ':'
121+
elif alignMode == 'right':
122+
return '-' * (padding - 1) + ':'
123+
elif alignMode == 'left':
124+
return ':' + '-' * (padding - 1)
125+
else: # default left
126+
return '-' * (padding)
127+
128+
# Calculate Padding For Each Column Based On Header and Data Length
129+
rowsPadding = {}
130+
for index, columnEntry in enumerate(header_map):
131+
padCount = max([len(str(v)) for d in data for k, v in d.items() if k == columnEntry['key_name']], default=0) + 2
132+
headerPadCount = len(columnEntry['header_name']) + 2
133+
rowsPadding[index] = headerPadCount if padCount <= headerPadCount else padCount
134+
135+
# Render Markdown Header
136+
rows = []
137+
rows.append('|'.join(strAlign(rowsPadding[index], columnEntry.get('align'), str(columnEntry['header_name'])) for index, columnEntry in enumerate(header_map)))
138+
rows.append('|'.join(dashAlign(rowsPadding[index], columnEntry.get('align')) for index, columnEntry in enumerate(header_map)))
139+
140+
# Render Tabular Data
141+
for item in data:
142+
rows.append('|'.join(strAlign(rowsPadding[index], columnEntry.get('align'), str(item[columnEntry['key_name']])) for index, columnEntry in enumerate(header_map)))
143+
144+
# Convert Tabular String Rows Into String
145+
tableString = ""
146+
for row in rows:
147+
tableString += f'|{row}|\n'
148+
149+
return tableString
150+
151+
104152
def element_count_rounded_notation(count: int) -> str:
105153
if count > 1e15 :
106154
# Quadrillion
@@ -184,9 +232,7 @@ def dump_markdown_metadata(reader: GGUFReader, args: argparse.Namespace) -> None
184232
markdown_content += f'There is {len(reader.fields)} key/value pair(s) in this file\n'
185233
markdown_content += '\n'
186234

187-
markdown_content += '| POS | TYPE | Elements | Key | Value |\n'
188-
markdown_content += '|-----|------------|----------|----------------------------------------|--------------------------------------------------------------------------------|\n'
189-
235+
kv_dump_table = []
190236
for n, field in enumerate(reader.fields.values(), 1):
191237
if not field.types:
192238
pretty_type = 'N/A'
@@ -216,7 +262,17 @@ def dump_markdown_metadata(reader: GGUFReader, args: argparse.Namespace) -> None
216262
for element_pos in range(render_element):
217263
value += str(field.parts[-1 - element_pos][0]) + (", " if total_elements > 1 else "")
218264
value = f'[ {value}{" ..." if total_elements > 1 else ""} ]'
219-
markdown_content += f'| {n:3} | {pretty_type:10} | {total_elements:8} | {field.name:38} | {value:<78} |\n'
265+
kv_dump_table.append({"n":n, "pretty_type":pretty_type, "total_elements":total_elements, "field_name":field.name, "value":value})
266+
267+
kv_dump_table_header_map = [
268+
{'key_name':'n', 'header_name':'POS', 'align':'center'},
269+
{'key_name':'pretty_type', 'header_name':'TYPE', 'align':'left'},
270+
{'key_name':'total_elements', 'header_name':'Count', 'align':'left'},
271+
{'key_name':'field_name', 'header_name':'Key', 'align':'left'},
272+
{'key_name':'value', 'header_name':'Value', 'align':'left'},
273+
]
274+
275+
markdown_content += markdownTableWithAlignmentSupport(kv_dump_table_header_map, kv_dump_table)
220276

221277
markdown_content += "\n"
222278

@@ -227,19 +283,23 @@ def dump_markdown_metadata(reader: GGUFReader, args: argparse.Namespace) -> None
227283
tensor_groups = {}
228284
total_elements = sum(tensor.n_elements for tensor in reader.tensors)
229285

286+
# Parsing Tensors Record
230287
for key, tensor in enumerate(reader.tensors):
231288
tensor_components = tensor.name.split('.')
232-
tensor_prefix = tensor_components[0]
233289

234-
if tensor_prefix == 'blk':
235-
tensor_prefix = f"{tensor_components[0]}.{tensor_components[1]}"
290+
# Classify Tensor Group
291+
tensor_group_name = "base"
292+
if tensor_components[0] == 'blk':
293+
tensor_group_name = f"{tensor_components[0]}.{tensor_components[1]}"
236294

237-
if tensor_prefix not in tensor_groups:
238-
tensor_groups[tensor_prefix] = []
239-
tensor_prefix_order.append(tensor_prefix)
295+
# Check if new Tensor Group
296+
if tensor_group_name not in tensor_groups:
297+
tensor_groups[tensor_group_name] = []
298+
tensor_prefix_order.append(tensor_group_name)
240299

300+
# Record Tensor and Tensor Position
301+
tensor_groups[tensor_group_name].append(tensor)
241302
tensor_name_to_key[tensor.name] = key
242-
tensor_groups[tensor_prefix].append(tensor)
243303

244304
# Tensors Mapping Dump
245305
markdown_content += f'## Tensors Overview {element_count_rounded_notation(total_elements)} Elements\n'
@@ -257,14 +317,27 @@ def dump_markdown_metadata(reader: GGUFReader, args: argparse.Namespace) -> None
257317
tensors = tensor_groups[group]
258318
group_elements = sum(tensor.n_elements for tensor in tensors)
259319
group_percentage = group_elements / total_elements * 100
260-
markdown_content += f"### {translate_tensor_name(group)} Tensor Group : {element_count_rounded_notation(group_elements)} Elements <a name=\"{group.replace('.', '_')}\"></a>\n"
261-
markdown_content += "| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type |\n"
262-
markdown_content += "|------|---------------------------|----------------------------------------------------|----------------|---------------------------------|------|\n"
320+
markdown_content += f"### <a name=\"{group.replace('.', '_')}\">{translate_tensor_name(group)} Tensor Group : {element_count_rounded_notation(group_elements)} Elements</a>\n"
263321

322+
tensor_dump_table = []
264323
for tensor in tensors:
265324
human_friendly_name = translate_tensor_name(tensor.name.replace(".weight", ".(W)").replace(".bias", ".(B)"))
266325
prettydims = ' x '.join('{0:^5}'.format(d) for d in list(tensor.shape) + [1] * (4 - len(tensor.shape)))
267-
markdown_content += f"| {tensor_name_to_key[tensor.name]:4} | {tensor.name:25} | {human_friendly_name:50} | ({element_count_rounded_notation(tensor.n_elements):>4}) {tensor.n_elements:7} | [{prettydims:29}] | {tensor.tensor_type.name:4} |\n"
326+
element_count_string = f"({element_count_rounded_notation(tensor.n_elements):>4}) {tensor.n_elements:7}"
327+
type_name_string = f"{tensor.tensor_type.name}"
328+
tensor_dump_table.append({"t_id":tensor_name_to_key[tensor.name], "layer_name":tensor.name, "human_layer_name":human_friendly_name, "element_count":element_count_string, "pretty_dims":prettydims, "tensor_type":type_name_string})
329+
330+
tensor_dump_table_header_map = [
331+
{'key_name':'t_id', 'header_name':'T_ID', 'align':'center'},
332+
{'key_name':'layer_name', 'header_name':'Tensor Layer Name', 'align':'left'},
333+
{'key_name':'human_layer_name', 'header_name':'Human Friendly Tensor Layer Name', 'align':'left'},
334+
{'key_name':'element_count', 'header_name':'Elements', 'align':'left'},
335+
{'key_name':'pretty_dims', 'header_name':'Shape', 'align':'left'},
336+
{'key_name':'tensor_type', 'header_name':'Type', 'align':'left'},
337+
]
338+
339+
markdown_content += markdownTableWithAlignmentSupport(tensor_dump_table_header_map, tensor_dump_table)
340+
268341
markdown_content += "\n"
269342
markdown_content += f"- Total elements in {group}: ({element_count_rounded_notation(group_elements):>4}) {group_elements}\n"
270343
markdown_content += f"- Percentage of total elements: {group_percentage:.2f}%\n"

0 commit comments

Comments
 (0)