Serialize constant Data outside of flatbuffer

mcr229 · facebook-github-bot · commit 55f9975b562b · 2024-01-05T09:59:23.000-08:00
Summary:
We introduce the `serialize_xnnpack_binary` method which serializees the constant data outside of the flatbuffer. It leverages the xnnheader introduced in the previous diff to store offsets and sizes for both the flatbuffer payload as well as the constant data payload.

Note here we have not yet switched the delegate to use the new `serialize_xnnpack_binary` function as this new serialization also requires changes on the runtime side. This will be tested in the diff which follows.

Differential Revision: D52498367
diff --git a/backends/xnnpack/serialization/schema.fbs b/backends/xnnpack/serialization/schema.fbs
@@ -281,6 +281,17 @@ table XNNLeakyReLU {
   flags: uint;
 }
 
+// Describes data offsets for constant data
+table ConstantDataOffset {
+  // Constant data offsets are relative to the constant data base offset provided
+  // in the XNNPACKHeader.
+  offset: uint32;
+
+  // The size in bytes of valid data starting at the offset. The constant data
+  // may be followed by padding before the next piece of constant data
+  size: uint32;
+}
+
 table XNNGraph {
   // Schema version.
   version:string;
@@ -299,11 +310,16 @@ table XNNGraph {
   // Tables of constant data, used for constant Values (e.g.
   // data field of weight tensors). Each constant is assigned an index into the table
   // which are each individually aligned. 0 index is reserved to be pointed to by non-constant
-  // Tensors
+  // Tensors. Both constant_buffer and constant_data may not both be non-empty
   constant_buffer:[Buffer];
 
   // the list index is memory buffer id, the value is the memory buffer size.
   mem_buffer_sizes: [uint];
+
+  // List of the constant data that follows the XNNGraph in this file. Each constant data is assigned an index into
+  // the table. 0 index is reserved to be pointed to by non-constant Tensor. Both constant_buffer and constant_data
+  // may not both be non-empty
+  constant_data:[ConstantDataOffset];
 }
 
 root_type XNNGraph;
diff --git a/backends/xnnpack/serialization/xnnpack_graph_schema.py b/backends/xnnpack/serialization/xnnpack_graph_schema.py
@@ -417,6 +417,12 @@ class Buffer:
     storage: bytes
 
 
+@dataclass
+class ConstantDataOffset:
+    offset: int
+    size: int
+
+
 @dataclass
 class XNNGraph:
     version: str
@@ -429,3 +435,5 @@ class XNNGraph:
 
     constant_buffer: List[Buffer]
     mem_buffer_sizes: List[int]
+
+    constant_data: List[ConstantDataOffset]
diff --git a/backends/xnnpack/serialization/xnnpack_graph_serialize.py b/backends/xnnpack/serialization/xnnpack_graph_serialize.py
@@ -9,10 +9,14 @@
 import tempfile
 
 from dataclasses import dataclass, fields, is_dataclass
-from typing import ClassVar, Literal
+from typing import ClassVar, List, Literal, Tuple
 
 import pkg_resources
-from executorch.backends.xnnpack.serialization.xnnpack_graph_schema import XNNGraph
+from executorch.backends.xnnpack.serialization.xnnpack_graph_schema import (
+    Buffer,
+    ConstantDataOffset,
+    XNNGraph,
+)
 from executorch.exir._serialize._dataclass import _DataclassEncoder
 
 from executorch.exir._serialize._flatbuffer import _flatc_compile
@@ -148,6 +152,72 @@ def to_bytes(self) -> bytes:
         return data
 
 
+def _padding_required(offset: int, alignment: int) -> int:
+    """Returns the padding required to align `offset` to `alignment`."""
+    remainder: int = offset % alignment
+    if remainder != 0:
+        return alignment - remainder
+    return 0
+
+
+def _aligned_size(input_size: int, alignment: int) -> int:
+    """Returns input_size padded up to the next whole multiple of alignment."""
+    return input_size + _padding_required(input_size, alignment)
+
+
+def _pad_to(data: bytes, length: int) -> bytes:
+    """Returns the input followed by enough zero bytes to become the requested length.
+
+    Args:
+        data: The data to pad.
+        length: The length of the returned data.
+    Returns:
+        The padded data.
+    Raises:
+        ValueError: If the requested length is less than the input length.
+    """
+    if length < len(data):
+        raise ValueError(f"Data length {len(data)} > padded length {length}")
+    if length > len(data):
+        data = data + b"\x00" * (length - len(data))
+    assert len(data) == length
+    return data
+
+
+def extract_constant_data(
+    constant_buffer: List[Buffer],
+    tensor_alignment: int,
+) -> Tuple[bytes, List[int]]:
+    """Copies the tensors from the provided list into a single buffer and tracks the offsets
+    of each tensor.
+
+        constant_buffer: list of Buffers from which to extract constants from. Not modified.
+        tensor_alignment: Alignment in bytes. The starting offset of each tensor in the
+            constant segment will be aligned to this value. Default to 16.
+
+    Returns:
+        A tuple of (constant segment, list of offsets for each tensor in the segment)
+    """
+    constant_segment_data: bytearray = bytearray()
+    constant_segment_offsets: List[int] = []
+    current_offset: int = 0
+    for i in range(len(constant_buffer)):
+        buffer = constant_buffer[i]
+        buffer_length = len(buffer.storage)
+        pad_length = _padding_required(buffer_length, tensor_alignment)
+
+        # Append each constant buffer to the constant segment.
+        constant_segment_data += buffer.storage
+        # Add padding for all but the last tensor.
+        if i < len(constant_buffer) - 1:
+            constant_segment_data += b"\x00" * pad_length
+
+        # Append constant data offset.
+        constant_segment_offsets.append(current_offset)
+        current_offset += buffer_length + pad_length
+    return bytes(constant_segment_data), constant_segment_offsets
+
+
 def convert_to_flatbuffer(xnnpack_graph: XNNGraph) -> bytes:
     sanity_check_xnngraph_dataclass(xnnpack_graph)
     xnnpack_graph_json = json.dumps(xnnpack_graph, cls=_DataclassEncoder)
@@ -163,3 +233,67 @@ def convert_to_flatbuffer(xnnpack_graph: XNNGraph) -> bytes:
         output_path = os.path.join(d, "schema.bin")
         with open(output_path, "rb") as output_file:
             return output_file.read()
+
+
+def serialize_xnnpack_binary(xnnpack_graph: XNNGraph) -> bytes:
+    """Returns the runtime binary representation of the given XNNGraph.
+
+    Args:
+        xnnpack_graph: XNNGraph object to serialize.
+
+    Returns:
+        The serialized form of the XNNGraph, ready for execution by XNNPACK Backend
+    """
+    constant_tensor_alignment = 16
+
+    # Extract constant data from the graph
+    constant_data, constant_data_offsets = extract_constant_data(
+        xnnpack_graph.constant_buffer, constant_tensor_alignment
+    )
+
+    assert len(constant_data_offsets) == len(xnnpack_graph.mem_buffer_sizes)
+
+    for offset_idx in range(len(constant_data_offsets)):
+        constant_data_offset = constant_data_offsets[offset_idx]
+        constant_data_size = xnnpack_graph.mem_buffer_sizes[offset_idx]
+        xnnpack_graph.constant_data.append(
+            ConstantDataOffset(constant_data_offset, constant_data_size)
+        )
+
+    # We are moving all constant data from the graph to the constant data section.
+    # So we remove all constant buffers except the first one
+    xnnpack_graph.constant_buffer = []
+    xnnpack_graph.mem_buffer_sizes = []
+
+    # Convert the XNNGraph to a flatbuffer
+    flatbuffer_payload = convert_to_flatbuffer(xnnpack_graph)
+
+    # size of flatbuffer data, padded to be 16 byte aligned
+    padded_flatbuffer_length: int = _aligned_size(
+        input_size=len(flatbuffer_payload),
+        alignment=constant_tensor_alignment,
+    )
+    # size of header to insert, padded to be 16 byte aligned
+    padded_header_length: int = _aligned_size(
+        input_size=XNNHeader.EXPECTED_LENGTH,
+        alignment=constant_tensor_alignment,
+    )
+
+    # Create the XNNPACK Header
+    header: bytes = XNNHeader(
+        flatbuffer_offset=padded_header_length,
+        flatbuffer_size=len(flatbuffer_payload),
+        constant_data_offset=padded_header_length + padded_flatbuffer_length,
+        constant_data_size=len(constant_data),
+    ).to_bytes()
+
+    # Concatenate the header, flatbuffer data, and constant data
+    # Constant data does not need to be padded to alignment because nothing follows it
+
+    return b"".join(
+        [
+            _pad_to(header, padded_header_length),
+            _pad_to(flatbuffer_payload, padded_flatbuffer_length),
+            constant_data,
+        ]
+    )
diff --git a/backends/xnnpack/xnnpack_preprocess.py b/backends/xnnpack/xnnpack_preprocess.py
@@ -232,6 +232,7 @@ def preprocess(
             output_ids=[],
             constant_buffer=[Buffer(storage=b"")],
             mem_buffer_sizes=[0],
+            constant_data=[],
         )
 
         node_visitors = get_node_visitors(ep, node_to_external_map)

Original file line number	Diff line number	Diff line change
`@@ -232,6 +232,7 @@ def preprocess(`
`232`	`232`	`output_ids=[],`
`233`	`233`	`constant_buffer=[Buffer(storage=b"")],`
`234`	`234`	`mem_buffer_sizes=[0],`
	`235`	`+ constant_data=[],`
`235`	`236`	`)`
`236`	`237`
`237`	`238`	`node_visitors = get_node_visitors(ep, node_to_external_map)`