update scripts

CISC · web-flow · commit b473285afc3f · 2025-02-16T13:46:46.000+01:00
diff --git a/gguf-py/gguf/scripts/gguf_convert_endian.py b/gguf-py/gguf/scripts/gguf_convert_endian.py
@@ -20,22 +20,15 @@
 
 
 def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None:
-    if np.uint32(1) == np.uint32(1).newbyteorder("<"):
-        # Host is little endian
-        host_endian = "little"
-        swapped_endian = "big"
+    file_endian = reader.endianess.name
+    if reader.byte_order == 'S':
+        host_endian = 'BIG' if file_endian == 'LITTLE' else 'LITTLE'
     else:
-        # Sorry PDP or other weird systems that don't use BE or LE.
-        host_endian = "big"
-        swapped_endian = "little"
-    if reader.byte_order == "S":
-        file_endian = swapped_endian
-    else:
-        file_endian = host_endian
-    order = host_endian if args.order == "native" else args.order
-    logger.info(f"* Host is {host_endian.upper()} endian, GGUF file seems to be {file_endian.upper()} endian")
+        host_endian = file_endian
+    order = host_endian if args.order == "native" else args.order.upper()
+    logger.info(f"* Host is {host_endian} endian, GGUF file seems to be {file_endian} endian")
     if file_endian == order:
-        logger.info(f"* File is already {order.upper()} endian. Nothing to do.")
+        logger.info(f"* File is already {order} endian. Nothing to do.")
         sys.exit(0)
     logger.info("* Checking tensors for conversion compatibility")
     for tensor in reader.tensors:
@@ -45,7 +38,7 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
             gguf.GGMLQuantizationType.Q8_0,
         ):
             raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}")
-    logger.info(f"* Preparing to convert from {file_endian.upper()} to {order.upper()}")
+    logger.info(f"* Preparing to convert from {file_endian} to {order}")
     if args.dry_run:
         return
     logger.warning("*** Warning *** Warning *** Warning **")
diff --git a/gguf-py/gguf/scripts/gguf_dump.py b/gguf-py/gguf/scripts/gguf_dump.py
@@ -9,8 +9,6 @@
 from pathlib import Path
 from typing import Any
 
-import numpy as np
-
 # Necessary to load the local gguf package
 if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists():
     sys.path.insert(0, str(Path(__file__).parent.parent.parent))
@@ -21,11 +19,11 @@
 
 
 def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]:
-    host_endian = 'LITTLE' if np.uint32(1) == np.uint32(1).newbyteorder("<") else 'BIG'
+    file_endian = reader.endianess.name
     if reader.byte_order == 'S':
-        file_endian = 'BIG' if host_endian == 'LITTLE' else 'LITTLE'
+        host_endian = 'BIG' if file_endian == 'LITTLE' else 'LITTLE'
     else:
-        file_endian = host_endian
+        host_endian = file_endian
     return (host_endian, file_endian)
 
 
@@ -45,12 +43,20 @@ def dump_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
             pretty_type = str(field.types[-1].name)
 
         log_message = f'  {n:5}: {pretty_type:10} | {len(field.data):8} | {field.name}'
-        if len(field.types) == 1:
+        if field.types:
             curr_type = field.types[0]
             if curr_type == GGUFValueType.STRING:
-                log_message += ' = {0}'.format(repr(str(bytes(field.parts[-1]), encoding='utf-8')[:60]))
-            elif field.types[0] in reader.gguf_scalar_to_np:
-                log_message += ' = {0}'.format(field.parts[-1][0])
+                content = field.contents()
+                if len(content) > 60:
+                    content = content[:57] + '...'
+                log_message += ' = {0}'.format(repr(content))
+            elif curr_type in reader.gguf_scalar_to_np:
+                log_message += ' = {0}'.format(field.contents())
+            else:
+                content = repr(field.contents(slice(6)))
+                if len(field.data) > 6:
+                    content = content[:-1] + ', ...]'
+                log_message += ' = {0}'.format(content)
         print(log_message)  # noqa: NP100
     if args.no_tensors:
         return
@@ -82,15 +88,9 @@ def dump_metadata_json(reader: GGUFReader, args: argparse.Namespace) -> None:
             curr["array_types"] = [t.name for t in field.types][1:]
             if not args.json_array:
                 continue
-            itype = field.types[-1]
-            if itype == GGUFValueType.STRING:
-                curr["value"] = [str(bytes(field.parts[idx]), encoding="utf-8") for idx in field.data]
-            else:
-                curr["value"] = [pv for idx in field.data for pv in field.parts[idx].tolist()]
-        elif field.types[0] == GGUFValueType.STRING:
-            curr["value"] = str(bytes(field.parts[-1]), encoding="utf-8")
+            curr["value"] = field.contents()
         else:
-            curr["value"] = field.parts[-1].tolist()[0]
+            curr["value"] = field.contents()
     if not args.no_tensors:
         for idx, tensor in enumerate(reader.tensors):
             tensors[tensor.name] = {
diff --git a/gguf-py/gguf/scripts/gguf_new_metadata.py b/gguf-py/gguf/scripts/gguf_new_metadata.py
@@ -27,45 +27,10 @@ class MetadataDetails(NamedTuple):
     description: str = ''
 
 
-def get_byteorder(reader: gguf.GGUFReader) -> gguf.GGUFEndian:
-    if np.uint32(1) == np.uint32(1).newbyteorder("<"):
-        # Host is little endian
-        host_endian = gguf.GGUFEndian.LITTLE
-        swapped_endian = gguf.GGUFEndian.BIG
-    else:
-        # Sorry PDP or other weird systems that don't use BE or LE.
-        host_endian = gguf.GGUFEndian.BIG
-        swapped_endian = gguf.GGUFEndian.LITTLE
-
-    if reader.byte_order == "S":
-        return swapped_endian
-    else:
-        return host_endian
-
-
-def decode_field(field: gguf.ReaderField | None) -> Any:
-    if field and field.types:
-        main_type = field.types[0]
-
-        if main_type == gguf.GGUFValueType.ARRAY:
-            sub_type = field.types[-1]
-
-            if sub_type == gguf.GGUFValueType.STRING:
-                return [str(bytes(field.parts[idx]), encoding='utf-8') for idx in field.data]
-            else:
-                return [pv for idx in field.data for pv in field.parts[idx].tolist()]
-        if main_type == gguf.GGUFValueType.STRING:
-            return str(bytes(field.parts[-1]), encoding='utf-8')
-        else:
-            return field.parts[-1][0]
-
-    return None
-
-
 def get_field_data(reader: gguf.GGUFReader, key: str) -> Any:
     field = reader.get_field(key)
 
-    return decode_field(field)
+    return field.contents() if field else None
 
 
 def find_token(token_list: Sequence[int], token: str) -> Sequence[int]:
@@ -93,7 +58,7 @@ def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new
             logger.debug(f'Removing {field.name}')
             continue
 
-        old_val = MetadataDetails(field.types[0], decode_field(field))
+        old_val = MetadataDetails(field.types[0], field.contents())
         val = new_metadata.get(field.name, old_val)
 
         if field.name in new_metadata:
@@ -192,7 +157,6 @@ def main() -> None:
     reader = gguf.GGUFReader(args.input, 'r')
 
     arch = get_field_data(reader, gguf.Keys.General.ARCHITECTURE)
-    endianess = get_byteorder(reader)
 
     token_list = get_field_data(reader, gguf.Keys.Tokenizer.LIST) or []
 
@@ -230,7 +194,7 @@ def main() -> None:
             sys.exit(0)
 
     logger.info(f'* Writing: {args.output}')
-    writer = gguf.GGUFWriter(args.output, arch=arch, endianess=endianess)
+    writer = gguf.GGUFWriter(args.output, arch=arch, endianess=reader.endianess)
 
     alignment = get_field_data(reader, gguf.Keys.General.ALIGNMENT)
     if alignment is not None: