Skip to content

Commit b473285

Browse files
authored
update scripts
1 parent f119cc8 commit b473285

File tree

3 files changed

+28
-71
lines changed

3 files changed

+28
-71
lines changed

gguf-py/gguf/scripts/gguf_convert_endian.py

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -20,22 +20,15 @@
2020

2121

2222
def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None:
23-
if np.uint32(1) == np.uint32(1).newbyteorder("<"):
24-
# Host is little endian
25-
host_endian = "little"
26-
swapped_endian = "big"
23+
file_endian = reader.endianess.name
24+
if reader.byte_order == 'S':
25+
host_endian = 'BIG' if file_endian == 'LITTLE' else 'LITTLE'
2726
else:
28-
# Sorry PDP or other weird systems that don't use BE or LE.
29-
host_endian = "big"
30-
swapped_endian = "little"
31-
if reader.byte_order == "S":
32-
file_endian = swapped_endian
33-
else:
34-
file_endian = host_endian
35-
order = host_endian if args.order == "native" else args.order
36-
logger.info(f"* Host is {host_endian.upper()} endian, GGUF file seems to be {file_endian.upper()} endian")
27+
host_endian = file_endian
28+
order = host_endian if args.order == "native" else args.order.upper()
29+
logger.info(f"* Host is {host_endian} endian, GGUF file seems to be {file_endian} endian")
3730
if file_endian == order:
38-
logger.info(f"* File is already {order.upper()} endian. Nothing to do.")
31+
logger.info(f"* File is already {order} endian. Nothing to do.")
3932
sys.exit(0)
4033
logger.info("* Checking tensors for conversion compatibility")
4134
for tensor in reader.tensors:
@@ -45,7 +38,7 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
4538
gguf.GGMLQuantizationType.Q8_0,
4639
):
4740
raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}")
48-
logger.info(f"* Preparing to convert from {file_endian.upper()} to {order.upper()}")
41+
logger.info(f"* Preparing to convert from {file_endian} to {order}")
4942
if args.dry_run:
5043
return
5144
logger.warning("*** Warning *** Warning *** Warning **")

gguf-py/gguf/scripts/gguf_dump.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99
from pathlib import Path
1010
from typing import Any
1111

12-
import numpy as np
13-
1412
# Necessary to load the local gguf package
1513
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists():
1614
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
@@ -21,11 +19,11 @@
2119

2220

2321
def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]:
24-
host_endian = 'LITTLE' if np.uint32(1) == np.uint32(1).newbyteorder("<") else 'BIG'
22+
file_endian = reader.endianess.name
2523
if reader.byte_order == 'S':
26-
file_endian = 'BIG' if host_endian == 'LITTLE' else 'LITTLE'
24+
host_endian = 'BIG' if file_endian == 'LITTLE' else 'LITTLE'
2725
else:
28-
file_endian = host_endian
26+
host_endian = file_endian
2927
return (host_endian, file_endian)
3028

3129

@@ -45,12 +43,20 @@ def dump_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
4543
pretty_type = str(field.types[-1].name)
4644

4745
log_message = f' {n:5}: {pretty_type:10} | {len(field.data):8} | {field.name}'
48-
if len(field.types) == 1:
46+
if field.types:
4947
curr_type = field.types[0]
5048
if curr_type == GGUFValueType.STRING:
51-
log_message += ' = {0}'.format(repr(str(bytes(field.parts[-1]), encoding='utf-8')[:60]))
52-
elif field.types[0] in reader.gguf_scalar_to_np:
53-
log_message += ' = {0}'.format(field.parts[-1][0])
49+
content = field.contents()
50+
if len(content) > 60:
51+
content = content[:57] + '...'
52+
log_message += ' = {0}'.format(repr(content))
53+
elif curr_type in reader.gguf_scalar_to_np:
54+
log_message += ' = {0}'.format(field.contents())
55+
else:
56+
content = repr(field.contents(slice(6)))
57+
if len(field.data) > 6:
58+
content = content[:-1] + ', ...]'
59+
log_message += ' = {0}'.format(content)
5460
print(log_message) # noqa: NP100
5561
if args.no_tensors:
5662
return
@@ -82,15 +88,9 @@ def dump_metadata_json(reader: GGUFReader, args: argparse.Namespace) -> None:
8288
curr["array_types"] = [t.name for t in field.types][1:]
8389
if not args.json_array:
8490
continue
85-
itype = field.types[-1]
86-
if itype == GGUFValueType.STRING:
87-
curr["value"] = [str(bytes(field.parts[idx]), encoding="utf-8") for idx in field.data]
88-
else:
89-
curr["value"] = [pv for idx in field.data for pv in field.parts[idx].tolist()]
90-
elif field.types[0] == GGUFValueType.STRING:
91-
curr["value"] = str(bytes(field.parts[-1]), encoding="utf-8")
91+
curr["value"] = field.contents()
9292
else:
93-
curr["value"] = field.parts[-1].tolist()[0]
93+
curr["value"] = field.contents()
9494
if not args.no_tensors:
9595
for idx, tensor in enumerate(reader.tensors):
9696
tensors[tensor.name] = {

gguf-py/gguf/scripts/gguf_new_metadata.py

Lines changed: 3 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -27,45 +27,10 @@ class MetadataDetails(NamedTuple):
2727
description: str = ''
2828

2929

30-
def get_byteorder(reader: gguf.GGUFReader) -> gguf.GGUFEndian:
31-
if np.uint32(1) == np.uint32(1).newbyteorder("<"):
32-
# Host is little endian
33-
host_endian = gguf.GGUFEndian.LITTLE
34-
swapped_endian = gguf.GGUFEndian.BIG
35-
else:
36-
# Sorry PDP or other weird systems that don't use BE or LE.
37-
host_endian = gguf.GGUFEndian.BIG
38-
swapped_endian = gguf.GGUFEndian.LITTLE
39-
40-
if reader.byte_order == "S":
41-
return swapped_endian
42-
else:
43-
return host_endian
44-
45-
46-
def decode_field(field: gguf.ReaderField | None) -> Any:
47-
if field and field.types:
48-
main_type = field.types[0]
49-
50-
if main_type == gguf.GGUFValueType.ARRAY:
51-
sub_type = field.types[-1]
52-
53-
if sub_type == gguf.GGUFValueType.STRING:
54-
return [str(bytes(field.parts[idx]), encoding='utf-8') for idx in field.data]
55-
else:
56-
return [pv for idx in field.data for pv in field.parts[idx].tolist()]
57-
if main_type == gguf.GGUFValueType.STRING:
58-
return str(bytes(field.parts[-1]), encoding='utf-8')
59-
else:
60-
return field.parts[-1][0]
61-
62-
return None
63-
64-
6530
def get_field_data(reader: gguf.GGUFReader, key: str) -> Any:
6631
field = reader.get_field(key)
6732

68-
return decode_field(field)
33+
return field.contents() if field else None
6934

7035

7136
def find_token(token_list: Sequence[int], token: str) -> Sequence[int]:
@@ -93,7 +58,7 @@ def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new
9358
logger.debug(f'Removing {field.name}')
9459
continue
9560

96-
old_val = MetadataDetails(field.types[0], decode_field(field))
61+
old_val = MetadataDetails(field.types[0], field.contents())
9762
val = new_metadata.get(field.name, old_val)
9863

9964
if field.name in new_metadata:
@@ -192,7 +157,6 @@ def main() -> None:
192157
reader = gguf.GGUFReader(args.input, 'r')
193158

194159
arch = get_field_data(reader, gguf.Keys.General.ARCHITECTURE)
195-
endianess = get_byteorder(reader)
196160

197161
token_list = get_field_data(reader, gguf.Keys.Tokenizer.LIST) or []
198162

@@ -230,7 +194,7 @@ def main() -> None:
230194
sys.exit(0)
231195

232196
logger.info(f'* Writing: {args.output}')
233-
writer = gguf.GGUFWriter(args.output, arch=arch, endianess=endianess)
197+
writer = gguf.GGUFWriter(args.output, arch=arch, endianess=reader.endianess)
234198

235199
alignment = get_field_data(reader, gguf.Keys.General.ALIGNMENT)
236200
if alignment is not None:

0 commit comments

Comments
 (0)