[executorch][serialization] Data serialization interface (#7487)

pytorchbot · lucylq · web-flow · commit 443ba3b93e54 · 2025-01-03T16:16:19.000-08:00
* [executorch][serialization] Data serialization interface Pull Request resolved: #7194 Introduce data serialization interface. ghstack-source-id: 260014193 @exported-using-ghexport Differential Revision: [D65947145](https://our.internmc.facebook.com/intern/diff/D65947145/) * [executorch][serialization] Refactor flatbuffer utils into separate file (#7488) Pull Request resolved: #7254 Todo: let xnnpack and vulkan serialization use these utils instead of redefining the same functions. For usage in extension/flat_tensor/serialize. ghstack-source-id: 260036856 @exported-using-ghexport Differential Revision: [D66854756](https://our.internmc.facebook.com/intern/diff/D66854756/) Co-authored-by: lucylq <lfq@meta.com> --------- Co-authored-by: lucylq <lfq@meta.com>
diff --git a/exir/_serialize/TARGETS b/exir/_serialize/TARGETS
@@ -33,6 +33,7 @@ runtime.python_library(
         "_dataclass.py",
         "_flatbuffer.py",
         "_program.py",
+        "padding.py",
     ],
     resources = {
         "//executorch/schema:program.fbs": "program.fbs",
diff --git a/exir/_serialize/_program.py b/exir/_serialize/_program.py
@@ -21,6 +21,8 @@
     _program_json_to_flatbuffer,
 )
 
+from executorch.exir._serialize.padding import aligned_size, pad_to, padding_required
+
 from executorch.exir.schema import (
     BackendDelegateDataReference,
     BackendDelegateInlineData,
@@ -50,19 +52,6 @@ def _json_to_program(program_json: bytes) -> Program:
     return _json_to_dataclass(json.loads(program_json), cls=Program)
 
 
-def _padding_required(offset: int, alignment: int) -> int:
-    """Returns the padding required to align `offset` to `alignment`."""
-    remainder: int = offset % alignment
-    if remainder != 0:
-        return alignment - remainder
-    return 0
-
-
-def _aligned_size(input_size: int, alignment: int) -> int:
-    """Returns input_size padded up to the next whole multiple of alignment."""
-    return input_size + _padding_required(input_size, alignment)
-
-
 def _insert_flatbuffer_header(
     flatbuffer_data: bytes, magic_regex: str, header_data: bytes
 ) -> bytes:
@@ -211,25 +200,6 @@ def to_bytes(self) -> bytes:
         return data
 
 
-def _pad_to(data: bytes, length: int) -> bytes:
-    """Returns the input followed by enough zero bytes to become the requested length.
-
-    Args:
-        data: The data to pad.
-        length: The length of the returned data.
-    Returns:
-        The padded data.
-    Raises:
-        ValueError: If the requested length is less than the input length.
-    """
-    if length < len(data):
-        raise ValueError(f"Data length {len(data)} > padded length {length}")
-    if length > len(data):
-        data = data + b"\x00" * (length - len(data))
-    assert len(data) == length
-    return data
-
-
 def _get_extended_header(program_data: bytes) -> Optional[_ExtendedHeader]:
     """Returns the extended header of the program data, if present and valid."""
     try:
@@ -330,7 +300,7 @@ def _extract_constant_segment(
         constant_segment_data.append(buffer.storage)
         buffer_length = len(buffer.storage)
         pad_length = (
-            _padding_required(buffer_length, tensor_alignment)
+            padding_required(buffer_length, tensor_alignment)
             if tensor_alignment is not None
             else 0
         )
@@ -432,11 +402,11 @@ def serialize_pte_binary(
         )
         program.segments.append(
             DataSegment(
-                offset=_aligned_size(prev_end, segment_alignment), size=len(data)
+                offset=aligned_size(prev_end, segment_alignment), size=len(data)
             )
         )
         # Add to aggregate segments cord with padding.
-        padding_length = _padding_required(len(segments_data), segment_alignment)
+        padding_length = padding_required(len(segments_data), segment_alignment)
         if padding_length > 0:
             segments_data.append(b"\x00" * padding_length)
         segments_data.append(data)
@@ -454,15 +424,15 @@ def serialize_pte_binary(
 
     # Size of the header to insert. Its size is padded to the largest
     # force_align value present in the schema.
-    padded_header_length: int = _aligned_size(
+    padded_header_length: int = aligned_size(
         input_size=_ExtendedHeader.EXPECTED_LENGTH,
         alignment=result.max_alignment,
     )
     # Size of the program with the header inserted.
     program_size: int = padded_header_length + len(result.data)
     # Offset to the first segment, or zero if there are no segments.
     segment_base_offset: int = (
-        _aligned_size(input_size=program_size, alignment=segment_alignment)
+        aligned_size(input_size=program_size, alignment=segment_alignment)
         if len(segments_data) > 0
         else 0
     )
@@ -471,7 +441,7 @@ def serialize_pte_binary(
     header_data: bytes = _ExtendedHeader(
         program_size=program_size, segment_base_offset=segment_base_offset
     ).to_bytes()
-    header_data = _pad_to(header_data, padded_header_length)
+    header_data = pad_to(header_data, padded_header_length)
 
     # Insert the header into the flatbuffer data.
     program_data: bytes = _insert_flatbuffer_header(
@@ -496,7 +466,7 @@ def serialize_pte_binary(
     # - segments data (optional); aligned to segment_alignment.
     pte_data = Cord(program_data)
     if len(segments_data) > 0:
-        padding_length = _padding_required(len(pte_data), segment_alignment)
+        padding_length = padding_required(len(pte_data), segment_alignment)
         pte_data.append(b"\x00" * padding_length)
         # The first segment after program data should start at the segment base offset.
         assert (
diff --git a/exir/_serialize/data_serializer.py b/exir/_serialize/data_serializer.py
@@ -0,0 +1,95 @@
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Dict, List, Sequence
+
+from executorch.exir._serialize._cord import Cord
+
+from executorch.exir.schema import ScalarType
+
+
+@dataclass
+class TensorLayout:
+    """Tensor layout information for externally-serialized tensors.
+
+    Attributes:
+        scalar_type: type of the elements in the tensor.
+        sizes: size of each dim in the tensor.
+        dim_order: specifies the order the dimensions are laid out in memory,
+            from outer to inner.
+    """
+
+    scalar_type: ScalarType
+    sizes: List[int]
+    dim_order: List[int]
+
+
+@dataclass
+class TensorEntry:
+    """Represents a single tensor in `DataPayload`, specifying its location
+    and metadata.
+
+    Attributes:
+       buffer_index: The index inside `DataPayload.buffers` that this
+            TensorEntry refers to.
+       layout: Metadata about the tensor.
+    """
+
+    buffer_index: int
+    layout: TensorLayout
+
+
+@dataclass
+class DataPayload:
+    """Contains the data and metadata required for serialization.
+
+    Having an index-based arrangement instead of embedding the buffers in
+    TensorEntry allows the caller to deduplicate buffers and point multiple
+    fully qualified names (FQNs) to the same entry.
+
+    Attributes:
+        buffers: a sequence of tensor buffers.
+        fqn_to_tensor: a map from fully qualified names to serializable tensors.
+    """
+
+    buffers: Sequence[bytes]
+    fqn_to_tensor: Dict[str, TensorEntry]
+
+
+class DataSerializer(ABC):
+    """Serializes and deserializes FQN-tagged tensor data.
+
+    This base class enables serialization into different formats. See
+    executorch/extension/flat_tensor/ for an example.
+    """
+
+    @abstractmethod
+    def serialize(
+        self,
+        data: DataPayload,
+    ) -> Cord:
+        """
+        Serializes a list of tensors emitted by ExecuTorch into a binary blob.
+
+        Args:
+            data: the tensor buffers and tensor layout information required for
+            serialization.
+
+        Returns:
+            A binary blob that contains the serialized data.
+        """
+        raise NotImplementedError("serialize_data")
+
+    @abstractmethod
+    def deserialize(self, blob: Cord) -> DataPayload:
+        """
+        Deserializes a blob into a list of tensors. Reverses the effect of
+        serialize.
+
+        Args:
+            blob: A binary blob that contains the serialized data.
+
+        Returns:
+            DataPayload: tensor buffers and tensor layout information
+            deserialized from `blob`.
+        """
+        raise NotImplementedError("deserialize_data")
diff --git a/exir/_serialize/padding.py b/exir/_serialize/padding.py
@@ -0,0 +1,35 @@
+# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+# pyre-strict
+
+
+def pad_to(data: bytes, length: int) -> bytes:
+    """Returns the input followed by enough zero bytes to become the requested length.
+
+    Args:
+        data: The data to pad.
+        length: The length of the returned data.
+    Returns:
+        The padded data.
+    Raises:
+        ValueError: If the requested length is less than the input length.
+    """
+    if length < len(data):
+        raise ValueError(f"Data length {len(data)} > padded length {length}")
+    if length > len(data):
+        data = data + b"\x00" * (length - len(data))
+    assert len(data) == length
+    return data
+
+
+def padding_required(offset: int, alignment: int) -> int:
+    """Returns the padding required to align `offset` to `alignment`."""
+    remainder: int = offset % alignment
+    if remainder != 0:
+        return alignment - remainder
+    return 0
+
+
+def aligned_size(input_size: int, alignment: int) -> int:
+    """Returns input_size padded up to the next whole multiple of alignment."""
+    return input_size + padding_required(input_size, alignment)