Skip to content

Serialize constant Data outside of flatbuffer #1542

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion backends/xnnpack/serialization/schema.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,17 @@ table XNNLeakyReLU {
flags: uint;
}

// Describes data offsets for constant data
table ConstantDataOffset {
// Constant data offsets are relative to the constant data base offset provided
// in the XNNPACKHeader.
offset: uint64;

// The size in bytes of valid data starting at the offset. The constant data
// may be followed by padding before the next piece of constant data
size: uint64;
}

table XNNGraph {
// Schema version.
version:string;
Expand All @@ -299,11 +310,16 @@ table XNNGraph {
// Tables of constant data, used for constant Values (e.g.
// data field of weight tensors). Each constant is assigned an index into the table
// which are each individually aligned. 0 index is reserved to be pointed to by non-constant
// Tensors
// Tensors. Exactly one of constant_buffer and constant_data must be non-empty
constant_buffer:[Buffer];

// the list index is memory buffer id, the value is the memory buffer size.
mem_buffer_sizes: [uint];

// List of the constant data that follows the XNNGraph in this file. Each constant data is assigned an index into
// the table. 0 index is reserved to be pointed to by non-constant Tensor. Exactly one of constant_buffer and
// constant_data must be non-empty
constant_data:[ConstantDataOffset];
}

root_type XNNGraph;
8 changes: 8 additions & 0 deletions backends/xnnpack/serialization/xnnpack_graph_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,12 @@ class Buffer:
storage: bytes


@dataclass
class ConstantDataOffset:
offset: int
size: int


@dataclass
class XNNGraph:
version: str
Expand All @@ -429,3 +435,5 @@ class XNNGraph:

constant_buffer: List[Buffer]
mem_buffer_sizes: List[int]

constant_data: List[ConstantDataOffset]
308 changes: 306 additions & 2 deletions backends/xnnpack/serialization/xnnpack_graph_serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,24 @@
import os
import tempfile

from dataclasses import fields, is_dataclass
from dataclasses import dataclass, fields, is_dataclass
from typing import ClassVar, List, Literal, Tuple

import pkg_resources
from executorch.backends.xnnpack.serialization.xnnpack_graph_schema import XNNGraph
from executorch.backends.xnnpack.serialization.xnnpack_graph_schema import (
Buffer,
ConstantDataOffset,
XNNGraph,
)
from executorch.exir._serialize._dataclass import _DataclassEncoder

from executorch.exir._serialize._flatbuffer import _flatc_compile

# Byte order of numbers written to program headers. Always little-endian
# regardless of the host system, since all commonly-used modern CPUs are little
# endian.
_HEADER_BYTEORDER: Literal["little"] = "little"


def sanity_check_xnngraph_dataclass(table, name: str = ""):
"""
Expand Down Expand Up @@ -68,6 +78,236 @@ def check_for_sym(obj, name):
check_for_sym(o, _name_field)


@dataclass
class XNNHeader:
# Class Constants
MAGIC_OFFSET: ClassVar[slice] = slice(4, 8)
HEADER_SIZE_OFFSET: ClassVar[slice] = slice(8, 10)
FLATBUFFER_OFFSET_OFFSET: ClassVar[slice] = slice(10, 14)
FLATBUFFER_SIZE_OFFSET: ClassVar[slice] = slice(14, 18)
CONSTANT_DATA_OFFSET_OFFSET: ClassVar[slice] = slice(18, 22)
CONSTANT_DATA_SIZE_OFFSET: ClassVar[slice] = slice(22, 30)

# magic bytes that should be at the beginning of the header
EXPECTED_MAGIC: ClassVar[bytes] = b"XH00"
# The length of the header in bytes.
EXPECTED_LENGTH: ClassVar[int] = (
# Zeros magic
# We offset the magic by 4 bytes so that it is in the same location
# as the flatbuffer payload's magic. This way we can dynamically
# choose between the XNNPACK Header and Flatbuffer Header
4
# Header magic
+ 4
# Header Length
+ 2
# Flatbuffer offset
+ 4
# Flatbuffer size
+ 4
# Constant Data offset
+ 4
# Constant Data size
+ 8
)

# Instance attributes. @dataclass will turn these into ctor args.

# offset to the flatbuffer data
flatbuffer_offset: int

# flatbuffer size
flatbuffer_size: int

# offset to the constant data
constant_data_offset: int

# constant data size
constant_data_size: int

@staticmethod
def from_bytes(data: bytes) -> "XNNHeader":
"""
Converts the given bytes into an XNNHeader object.

We check that the magic and length is valid, but do not check that the offset and
size values are valid. We ensure here that the XNNHeader metadata is valid (magic and length)
but not the offsets and sizes themselves. Callers should use is_valid() to validate the
header contents

Args:
data: Data to read from
Returns:
XNNHeader object that contains the parsed data
Raises:
ValueError: if not enough data is provided, or if parsed length/magic are invalid
"""
if len(data) > XNNHeader.EXPECTED_LENGTH:
raise ValueError(
f"Invalid XNNHeader: expected no more than {XNNHeader.EXPECTED_LENGTH} bytes, got {len(data)}"
)

magic: bytes = data[XNNHeader.MAGIC_OFFSET]
length_bytes: bytes = data[XNNHeader.HEADER_SIZE_OFFSET]
flatbuffer_offset_bytes: bytes = data[XNNHeader.FLATBUFFER_OFFSET_OFFSET]
flatbuffer_size_bytes: bytes = data[XNNHeader.FLATBUFFER_SIZE_OFFSET]
constant_data_offset_bytes: bytes = data[XNNHeader.CONSTANT_DATA_OFFSET_OFFSET]
constant_data_size_bytes: bytes = data[XNNHeader.CONSTANT_DATA_SIZE_OFFSET]

length = int.from_bytes(length_bytes, byteorder=_HEADER_BYTEORDER)

if magic != XNNHeader.EXPECTED_MAGIC:
raise ValueError(
f"Invalid XNNHeader: invalid magic bytes {magic}, expected {XNNHeader.EXPECTED_MAGIC}"
)
if length != len(data):
raise ValueError(
f"Invalid XNNHeader: Invalid parsed length: data given was {len(data)} bytes, parsed length was {length} bytes"
)

return XNNHeader(
flatbuffer_offset=int.from_bytes(
flatbuffer_offset_bytes, byteorder=_HEADER_BYTEORDER
),
flatbuffer_size=int.from_bytes(
flatbuffer_size_bytes, byteorder=_HEADER_BYTEORDER
),
constant_data_offset=int.from_bytes(
constant_data_offset_bytes, byteorder=_HEADER_BYTEORDER
),
constant_data_size=int.from_bytes(
constant_data_size_bytes, byteorder=_HEADER_BYTEORDER
),
)

def is_valid(self) -> bool:
"""
Sanity checks the the XNNHeader.

We check that the flatbuffer size is non_zero and that the constant data offset
is after the flatbuffer payload. We check that the constant data size is non-negative.

Returns:
True if the XNNHeader is valid, False otherwise
"""
# flatbuffer payload must have a non-zero size
valid_flatbuffer_size = self.flatbuffer_size > 0
# constant data offset is after flatbuffer payload
valid_const_data_offset = (
self.constant_data_offset >= self.flatbuffer_offset + self.flatbuffer_size
)
valid_const_data_size = self.constant_data_size >= 0

return (
valid_flatbuffer_size and valid_const_data_offset and valid_const_data_size
)

def to_bytes(self) -> bytes:
"""
Converts XNNHeader to bytes for serialization.

Returns:
Returns the binary representation of the XNNPACK Header.
"""

# We expect the given offsets and sizes to be valid
if not self.is_valid():
raise ValueError("Invalid XNNHeader: header failed is_valid() check")

data: bytes = (
# Padding for magic bytes. This is so that header magic is in the same position
# as the flatbuffer magic, and allows consumer to detect whether the header is
# being used or not
b"\x00\x00\x00\x00"
# XNNPACK Header's magic. This allows consumer to detect whether or not the header
# is being used or the flatbuffer header is being used
+ self.EXPECTED_MAGIC
# uint16_t: Size of this header. This makes it easier to add new fields to the header
# in the future.
+ self.EXPECTED_LENGTH.to_bytes(2, byteorder=_HEADER_BYTEORDER)
# uint32_t: Offset to the start of the flatbuffer data
+ self.flatbuffer_offset.to_bytes(4, byteorder=_HEADER_BYTEORDER)
# uint32_t: Size of the flatbuffer data payload
+ self.flatbuffer_size.to_bytes(4, byteorder=_HEADER_BYTEORDER)
# uint32_t: Offset to the start of the constant data
+ self.constant_data_offset.to_bytes(4, byteorder=_HEADER_BYTEORDER)
# uint64_t: Size of the constant data
+ self.constant_data_size.to_bytes(8, byteorder=_HEADER_BYTEORDER)
)

assert len(data) == XNNHeader.EXPECTED_LENGTH

return data


def _padding_required(offset: int, alignment: int) -> int:
"""Returns the padding required to align `offset` to `alignment`."""
remainder: int = offset % alignment
if remainder != 0:
return alignment - remainder
return 0


def _aligned_size(input_size: int, alignment: int) -> int:
"""Returns input_size padded up to the next whole multiple of alignment."""
aligned_size = input_size + _padding_required(input_size, alignment)
assert aligned_size % alignment == 0
return aligned_size


def _pad_to(data: bytes, length: int) -> bytes:
"""Returns the input followed by enough zero bytes to become the requested length.

Args:
data: The data to pad.
length: The length of the returned data.
Returns:
The padded data.
Raises:
ValueError: If the requested length is less than the input length.
"""
if length < len(data):
raise ValueError(f"Data length {len(data)} > padded length {length}")
if length > len(data):
data = data + b"\x00" * (length - len(data))
assert len(data) == length
return data


def _extract_constant_data(
constant_buffer: List[Buffer],
tensor_alignment: int = 16,
) -> Tuple[bytes, List[int]]:
"""Copies the tensors from the provided list into a single buffer and tracks the offsets
of each tensor.

constant_buffer: list of Buffers from which to extract constants from. Not modified.
tensor_alignment: Alignment in bytes. The starting offset of each tensor in the
constant segment will be aligned to this value. Default to 16.

Returns:
A tuple of (constant segment, list of offsets for each tensor in the segment)
"""
constant_segment_data: bytearray = bytearray()
constant_segment_offsets: List[int] = []
current_offset: int = 0
for i in range(len(constant_buffer)):
buffer = constant_buffer[i]
buffer_length = len(buffer.storage)
pad_length = _padding_required(buffer_length, tensor_alignment)

# Append each constant buffer to the constant segment.
constant_segment_data += buffer.storage
# Add padding for all but the last tensor.
if i < len(constant_buffer) - 1:
constant_segment_data += b"\x00" * pad_length

# Append constant data offset.
constant_segment_offsets.append(current_offset)
current_offset += buffer_length + pad_length
return bytes(constant_segment_data), constant_segment_offsets


def convert_to_flatbuffer(xnnpack_graph: XNNGraph) -> bytes:
sanity_check_xnngraph_dataclass(xnnpack_graph)
xnnpack_graph_json = json.dumps(xnnpack_graph, cls=_DataclassEncoder)
Expand All @@ -83,3 +323,67 @@ def convert_to_flatbuffer(xnnpack_graph: XNNGraph) -> bytes:
output_path = os.path.join(d, "schema.bin")
with open(output_path, "rb") as output_file:
return output_file.read()


def serialize_xnnpack_binary(xnnpack_graph: XNNGraph) -> bytes:
"""Returns the runtime binary representation of the given XNNGraph.

Args:
xnnpack_graph: XNNGraph object to serialize.

Returns:
The serialized form of the XNNGraph, ready for execution by XNNPACK Backend
"""
constant_tensor_alignment = 16

# Extract constant data from the graph
constant_data, constant_data_offsets = _extract_constant_data(
xnnpack_graph.constant_buffer, constant_tensor_alignment
)

assert len(constant_data_offsets) == len(xnnpack_graph.mem_buffer_sizes)

for offset_idx in range(len(constant_data_offsets)):
constant_data_offset = constant_data_offsets[offset_idx]
constant_data_size = xnnpack_graph.mem_buffer_sizes[offset_idx]
xnnpack_graph.constant_data.append(
ConstantDataOffset(constant_data_offset, constant_data_size)
)

# We are moving all constant data from the graph to the constant data section.
# So we remove all constant buffers
xnnpack_graph.constant_buffer = []
xnnpack_graph.mem_buffer_sizes = []

# Convert the XNNGraph to a flatbuffer
flatbuffer_payload = convert_to_flatbuffer(xnnpack_graph)

# size of flatbuffer data, padded to be `constant_tensor_alignment` byte aligned
padded_flatbuffer_length: int = _aligned_size(
input_size=len(flatbuffer_payload),
alignment=constant_tensor_alignment,
)
# size of header to insert, padded to be `constant_tensor_alignment` byte aligned
padded_header_length: int = _aligned_size(
input_size=XNNHeader.EXPECTED_LENGTH,
alignment=constant_tensor_alignment,
)

# Create the XNNPACK Header
header: bytes = XNNHeader(
flatbuffer_offset=padded_header_length,
flatbuffer_size=len(flatbuffer_payload),
constant_data_offset=padded_header_length + padded_flatbuffer_length,
constant_data_size=len(constant_data),
).to_bytes()

# Concatenate the header, flatbuffer data, and constant data
# Constant data does not need to be padded to alignment because nothing follows it

return b"".join(
[
_pad_to(header, padded_header_length),
_pad_to(flatbuffer_payload, padded_flatbuffer_length),
constant_data,
]
)
Loading