pytorch · mcr229 · Jan 11, 2024 · Jan 11, 2024
@@ -281,6 +281,17 @@ table XNNLeakyReLU {
   flags: uint;
 }
 
+// Describes data offsets for constant data
+table ConstantDataOffset {
+  // Constant data offsets are relative to the constant data base offset provided
+  // in the XNNPACKHeader.
+  offset: uint64;
+
+  // The size in bytes of valid data starting at the offset. The constant data
+  // may be followed by padding before the next piece of constant data
+  size: uint64;
+}
+
 table XNNGraph {
   // Schema version.
   version:string;
@@ -299,11 +310,16 @@ table XNNGraph {
   // Tables of constant data, used for constant Values (e.g.
   // data field of weight tensors). Each constant is assigned an index into the table
   // which are each individually aligned. 0 index is reserved to be pointed to by non-constant
-  // Tensors
+  // Tensors. Exactly one of constant_buffer and constant_data must be non-empty
   constant_buffer:[Buffer];
 
   // the list index is memory buffer id, the value is the memory buffer size.
   mem_buffer_sizes: [uint];
+
+  // List of the constant data that follows the XNNGraph in this file. Each constant data is assigned an index into
+  // the table. 0 index is reserved to be pointed to by non-constant Tensor. Exactly one of constant_buffer and
+  // constant_data must be non-empty
+  constant_data:[ConstantDataOffset];
 }
 
 root_type XNNGraph;
@@ -417,6 +417,12 @@ class Buffer:
     storage: bytes
 
 
+@dataclass
+class ConstantDataOffset:
+    offset: int
+    size: int
+
+
 @dataclass
 class XNNGraph:
     version: str
@@ -429,3 +435,5 @@ class XNNGraph:
 
     constant_buffer: List[Buffer]
     mem_buffer_sizes: List[int]
+
+    constant_data: List[ConstantDataOffset]
@@ -8,14 +8,24 @@
 import os
 import tempfile
 
-from dataclasses import fields, is_dataclass
+from dataclasses import dataclass, fields, is_dataclass
+from typing import ClassVar, List, Literal, Tuple
 
 import pkg_resources
-from executorch.backends.xnnpack.serialization.xnnpack_graph_schema import XNNGraph
+from executorch.backends.xnnpack.serialization.xnnpack_graph_schema import (
+    Buffer,
+    ConstantDataOffset,
+    XNNGraph,
+)
 from executorch.exir._serialize._dataclass import _DataclassEncoder
 
 from executorch.exir._serialize._flatbuffer import _flatc_compile
 
+# Byte order of numbers written to program headers. Always little-endian
+# regardless of the host system, since all commonly-used modern CPUs are little
+# endian.
+_HEADER_BYTEORDER: Literal["little"] = "little"
+
 
 def sanity_check_xnngraph_dataclass(table, name: str = ""):
     """
@@ -68,6 +78,236 @@ def check_for_sym(obj, name):
             check_for_sym(o, _name_field)
 
 
+@dataclass
+class XNNHeader:
+    # Class Constants
+    MAGIC_OFFSET: ClassVar[slice] = slice(4, 8)
+    HEADER_SIZE_OFFSET: ClassVar[slice] = slice(8, 10)
+    FLATBUFFER_OFFSET_OFFSET: ClassVar[slice] = slice(10, 14)
+    FLATBUFFER_SIZE_OFFSET: ClassVar[slice] = slice(14, 18)
+    CONSTANT_DATA_OFFSET_OFFSET: ClassVar[slice] = slice(18, 22)
+    CONSTANT_DATA_SIZE_OFFSET: ClassVar[slice] = slice(22, 30)
+
+    # magic bytes that should be at the beginning of the header
+    EXPECTED_MAGIC: ClassVar[bytes] = b"XH00"
+    # The length of the header in bytes.
+    EXPECTED_LENGTH: ClassVar[int] = (
+        # Zeros magic
+        # We offset the magic by 4 bytes so that it is in the same location
+        # as the flatbuffer payload's magic. This way we can dynamically
+        # choose between the XNNPACK Header and Flatbuffer Header
+        4
+        # Header magic
+        + 4
+        # Header Length
+        + 2
+        # Flatbuffer offset
+        + 4
+        # Flatbuffer size
+        + 4
+        # Constant Data offset
+        + 4
+        # Constant Data size
+        + 8
+    )
+
+    # Instance attributes. @dataclass will turn these into ctor args.
+
+    # offset to the flatbuffer data
+    flatbuffer_offset: int
+
+    # flatbuffer size
+    flatbuffer_size: int
+
+    # offset to the constant data
+    constant_data_offset: int
+
+    # constant data size
+    constant_data_size: int
+
+    @staticmethod
+    def from_bytes(data: bytes) -> "XNNHeader":
+        """
+        Converts the given bytes into an XNNHeader object.
+
+        We check that the magic and length is valid, but do not check that the offset and
+        size values are valid. We ensure here that the XNNHeader metadata is valid (magic and length)
+        but not the offsets and sizes themselves. Callers should use is_valid() to validate the
+        header contents
+
+        Args:
+            data: Data to read from
+        Returns:
+            XNNHeader object that contains the parsed data
+        Raises:
+            ValueError: if not enough data is provided, or if parsed length/magic are invalid
+        """
+        if len(data) > XNNHeader.EXPECTED_LENGTH:
+            raise ValueError(
+                f"Invalid XNNHeader: expected no more than {XNNHeader.EXPECTED_LENGTH} bytes, got {len(data)}"
+            )
+
+        magic: bytes = data[XNNHeader.MAGIC_OFFSET]
+        length_bytes: bytes = data[XNNHeader.HEADER_SIZE_OFFSET]
+        flatbuffer_offset_bytes: bytes = data[XNNHeader.FLATBUFFER_OFFSET_OFFSET]
+        flatbuffer_size_bytes: bytes = data[XNNHeader.FLATBUFFER_SIZE_OFFSET]
+        constant_data_offset_bytes: bytes = data[XNNHeader.CONSTANT_DATA_OFFSET_OFFSET]
+        constant_data_size_bytes: bytes = data[XNNHeader.CONSTANT_DATA_SIZE_OFFSET]
+
+        length = int.from_bytes(length_bytes, byteorder=_HEADER_BYTEORDER)
+
+        if magic != XNNHeader.EXPECTED_MAGIC:
+            raise ValueError(
+                f"Invalid XNNHeader: invalid magic bytes {magic}, expected {XNNHeader.EXPECTED_MAGIC}"
+            )
+        if length != len(data):
+            raise ValueError(
+                f"Invalid XNNHeader: Invalid parsed length: data given was {len(data)} bytes, parsed length was {length} bytes"
+            )
+
+        return XNNHeader(
+            flatbuffer_offset=int.from_bytes(
+                flatbuffer_offset_bytes, byteorder=_HEADER_BYTEORDER
+            ),
+            flatbuffer_size=int.from_bytes(
+                flatbuffer_size_bytes, byteorder=_HEADER_BYTEORDER
+            ),
+            constant_data_offset=int.from_bytes(
+                constant_data_offset_bytes, byteorder=_HEADER_BYTEORDER
+            ),
+            constant_data_size=int.from_bytes(
+                constant_data_size_bytes, byteorder=_HEADER_BYTEORDER
+            ),
+        )
+
+    def is_valid(self) -> bool:
+        """
+        Sanity checks the the XNNHeader.
+
+        We check that the flatbuffer size is non_zero and that the constant data offset
+        is after the flatbuffer payload. We check that the constant data size is non-negative.
+
+        Returns:
+            True if the XNNHeader is valid, False otherwise
+        """
+        # flatbuffer payload must have a non-zero size
+        valid_flatbuffer_size = self.flatbuffer_size > 0
+        # constant data offset is after flatbuffer payload
+        valid_const_data_offset = (
+            self.constant_data_offset >= self.flatbuffer_offset + self.flatbuffer_size
+        )
+        valid_const_data_size = self.constant_data_size >= 0
+
+        return (
+            valid_flatbuffer_size and valid_const_data_offset and valid_const_data_size
+        )
+
+    def to_bytes(self) -> bytes:
+        """
+        Converts XNNHeader to bytes for serialization.
+
+        Returns:
+            Returns the binary representation of the XNNPACK Header.
+        """
+
+        # We expect the given offsets and sizes to be valid
+        if not self.is_valid():
+            raise ValueError("Invalid XNNHeader: header failed is_valid() check")
+
+        data: bytes = (
+            # Padding for magic bytes. This is so that header magic is in the same position
+            # as the flatbuffer magic, and allows consumer to detect whether the header is
+            # being used or not
+            b"\x00\x00\x00\x00"
+            # XNNPACK Header's magic. This allows consumer to detect whether or not the header
+            # is being used or the flatbuffer header is being used
+            + self.EXPECTED_MAGIC
+            # uint16_t: Size of this header. This makes it easier to add new fields to the header
+            # in the future.
+            + self.EXPECTED_LENGTH.to_bytes(2, byteorder=_HEADER_BYTEORDER)
+            # uint32_t: Offset to the start of the flatbuffer data
+            + self.flatbuffer_offset.to_bytes(4, byteorder=_HEADER_BYTEORDER)
+            # uint32_t: Size of the flatbuffer data payload
+            + self.flatbuffer_size.to_bytes(4, byteorder=_HEADER_BYTEORDER)
+            # uint32_t: Offset to the start of the constant data
+            + self.constant_data_offset.to_bytes(4, byteorder=_HEADER_BYTEORDER)
+            # uint64_t: Size of the constant data
+            + self.constant_data_size.to_bytes(8, byteorder=_HEADER_BYTEORDER)
+        )
+
+        assert len(data) == XNNHeader.EXPECTED_LENGTH
+
+        return data
+
+
+def _padding_required(offset: int, alignment: int) -> int:
+    """Returns the padding required to align `offset` to `alignment`."""
+    remainder: int = offset % alignment
+    if remainder != 0:
+        return alignment - remainder
+    return 0
+
+
+def _aligned_size(input_size: int, alignment: int) -> int:
+    """Returns input_size padded up to the next whole multiple of alignment."""
+    aligned_size = input_size + _padding_required(input_size, alignment)
+    assert aligned_size % alignment == 0
+    return aligned_size
+
+
+def _pad_to(data: bytes, length: int) -> bytes:
+    """Returns the input followed by enough zero bytes to become the requested length.
+
+    Args:
+        data: The data to pad.
+        length: The length of the returned data.
+    Returns:
+        The padded data.
+    Raises:
+        ValueError: If the requested length is less than the input length.
+    """
+    if length < len(data):
+        raise ValueError(f"Data length {len(data)} > padded length {length}")
+    if length > len(data):
+        data = data + b"\x00" * (length - len(data))
+    assert len(data) == length
+    return data
+
+
+def _extract_constant_data(
+    constant_buffer: List[Buffer],
+    tensor_alignment: int = 16,
+) -> Tuple[bytes, List[int]]:
+    """Copies the tensors from the provided list into a single buffer and tracks the offsets
+    of each tensor.
+
+        constant_buffer: list of Buffers from which to extract constants from. Not modified.
+        tensor_alignment: Alignment in bytes. The starting offset of each tensor in the
+            constant segment will be aligned to this value. Default to 16.
+
+    Returns:
+        A tuple of (constant segment, list of offsets for each tensor in the segment)
+    """
+    constant_segment_data: bytearray = bytearray()
+    constant_segment_offsets: List[int] = []
+    current_offset: int = 0
+    for i in range(len(constant_buffer)):
+        buffer = constant_buffer[i]
+        buffer_length = len(buffer.storage)
+        pad_length = _padding_required(buffer_length, tensor_alignment)
+
+        # Append each constant buffer to the constant segment.
+        constant_segment_data += buffer.storage
+        # Add padding for all but the last tensor.
+        if i < len(constant_buffer) - 1:
+            constant_segment_data += b"\x00" * pad_length
+
+        # Append constant data offset.
+        constant_segment_offsets.append(current_offset)
+        current_offset += buffer_length + pad_length
+    return bytes(constant_segment_data), constant_segment_offsets
+
+
 def convert_to_flatbuffer(xnnpack_graph: XNNGraph) -> bytes:
     sanity_check_xnngraph_dataclass(xnnpack_graph)
     xnnpack_graph_json = json.dumps(xnnpack_graph, cls=_DataclassEncoder)
@@ -83,3 +323,67 @@ def convert_to_flatbuffer(xnnpack_graph: XNNGraph) -> bytes:
         output_path = os.path.join(d, "schema.bin")
         with open(output_path, "rb") as output_file:
             return output_file.read()
+
+
+def serialize_xnnpack_binary(xnnpack_graph: XNNGraph) -> bytes:
+    """Returns the runtime binary representation of the given XNNGraph.
+
+    Args:
+        xnnpack_graph: XNNGraph object to serialize.
+
+    Returns:
+        The serialized form of the XNNGraph, ready for execution by XNNPACK Backend
+    """
+    constant_tensor_alignment = 16
+
+    # Extract constant data from the graph
+    constant_data, constant_data_offsets = _extract_constant_data(
+        xnnpack_graph.constant_buffer, constant_tensor_alignment
+    )
+
+    assert len(constant_data_offsets) == len(xnnpack_graph.mem_buffer_sizes)
+
+    for offset_idx in range(len(constant_data_offsets)):
+        constant_data_offset = constant_data_offsets[offset_idx]
+        constant_data_size = xnnpack_graph.mem_buffer_sizes[offset_idx]
+        xnnpack_graph.constant_data.append(
+            ConstantDataOffset(constant_data_offset, constant_data_size)
+        )
+
+    # We are moving all constant data from the graph to the constant data section.
+    # So we remove all constant buffers
+    xnnpack_graph.constant_buffer = []
+    xnnpack_graph.mem_buffer_sizes = []
+
+    # Convert the XNNGraph to a flatbuffer
+    flatbuffer_payload = convert_to_flatbuffer(xnnpack_graph)
+
+    # size of flatbuffer data, padded to be `constant_tensor_alignment`  byte aligned
+    padded_flatbuffer_length: int = _aligned_size(
+        input_size=len(flatbuffer_payload),
+        alignment=constant_tensor_alignment,
+    )
+    # size of header to insert, padded to be `constant_tensor_alignment` byte aligned
+    padded_header_length: int = _aligned_size(
+        input_size=XNNHeader.EXPECTED_LENGTH,
+        alignment=constant_tensor_alignment,
+    )
+
+    # Create the XNNPACK Header
+    header: bytes = XNNHeader(
+        flatbuffer_offset=padded_header_length,
+        flatbuffer_size=len(flatbuffer_payload),
+        constant_data_offset=padded_header_length + padded_flatbuffer_length,
+        constant_data_size=len(constant_data),
+    ).to_bytes()
+
+    # Concatenate the header, flatbuffer data, and constant data
+    # Constant data does not need to be padded to alignment because nothing follows it
+
+    return b"".join(
+        [
+            _pad_to(header, padded_header_length),
+            _pad_to(flatbuffer_payload, padded_flatbuffer_length),
+            constant_data,
+        ]
+    )