Skip to content

Commit 842c246

Browse files
mcr229facebook-github-bot
authored andcommitted
Intorduce XNNPACKHeaderto manage flatbuffer data and constant data (#1523)
Summary: Introducing the XNNPACKHeader to manage the flatbuffer data and constant data. Previously, we have serialized constant data along with flatbuffer. However, with large weights and large tensors in general, this takes a large amount of time and memory converting our dataclass --> json --> flatbuffer. This has become a blocker on some larger models To fix, we circumvent serializing constant tensors via flatbuffer, by appending the constant data after the flatbuffer payload. In order to do this, we need an XNNPACKHeader which will give us the flatbuffer offset, flatbuffer size, constant data offset, and constant data sizes. It will look something like this: ``` ┌───────────────────────────────────┐ │XNNPACK Header │ ├───────────────────────────────────┤ │Padding for 16 byte alignment │ ├───────────────────────────────────┤ │Flatbuffer-serialized payload data │ │ │ │ │ ├───────────────────────────────────┤ │Padding for 16 byte alignment │ ├───────────────────────────────────┤ │Constant Data │ │ │ │ │ └───────────────────────────────────┘ ``` Within the XNNPACK Header, we hold the following: - 4 bytes to offset the header magic - 4 bytes for the header magic - 2 bytes for the header length - 4 bytes for the flatbuffer offset - 4 bytes for the flatbuffer size - 4 bytes for constant data offset - 8 bytes for constant data size Reviewed By: digantdesai Differential Revision: D52497977
1 parent 428da4f commit 842c246

File tree

3 files changed

+285
-1
lines changed

3 files changed

+285
-1
lines changed

backends/xnnpack/serialization/xnnpack_graph_serialize.py

Lines changed: 169 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,20 @@
88
import os
99
import tempfile
1010

11-
from dataclasses import fields, is_dataclass
11+
from dataclasses import dataclass, fields, is_dataclass
12+
from typing import ClassVar, Literal
1213

1314
import pkg_resources
1415
from executorch.backends.xnnpack.serialization.xnnpack_graph_schema import XNNGraph
1516
from executorch.exir._serialize._dataclass import _DataclassEncoder
1617

1718
from executorch.exir._serialize._flatbuffer import _flatc_compile
1819

20+
# Byte order of numbers written to program headers. Always little-endian
21+
# regardless of the host system, since all commonly-used modern CPUs are little
22+
# endian.
23+
_HEADER_BYTEORDER: Literal["little"] = "little"
24+
1925

2026
def sanity_check_xnngraph_dataclass(table, name: str = ""):
2127
"""
@@ -68,6 +74,168 @@ def check_for_sym(obj, name):
6874
check_for_sym(o, _name_field)
6975

7076

77+
@dataclass
78+
class XNNHeader:
79+
# Class Constants
80+
MAGIC_OFFSET: ClassVar[slice] = slice(4, 8)
81+
HEADER_SIZE_OFFSET: ClassVar[slice] = slice(8, 10)
82+
FLATBUFFER_OFFSET_OFFSET: ClassVar[slice] = slice(10, 14)
83+
FLATBUFFER_SIZE_OFFSET: ClassVar[slice] = slice(14, 18)
84+
CONSTANT_DATA_OFFSET_OFFSET: ClassVar[slice] = slice(18, 22)
85+
CONSTANT_DATA_SIZE_OFFSET: ClassVar[slice] = slice(22, 30)
86+
87+
# magic bytes that should be at the beginning of the header
88+
EXPECTED_MAGIC: ClassVar[bytes] = b"XH00"
89+
# The length of the header in bytes.
90+
EXPECTED_LENGTH: ClassVar[int] = (
91+
# Zeros magic
92+
# We offset the magic by 4 bytes so that it is in the same location
93+
# as the flatbuffer payload's magic. This way we can dynamically
94+
# choose between the XNNPACK Header and Flatbuffer Header
95+
4
96+
# Header magic
97+
+ 4
98+
# Header Length
99+
+ 2
100+
# Flatbuffer offset
101+
+ 4
102+
# Flatbuffer size
103+
+ 4
104+
# Constant Data offset
105+
+ 4
106+
# Constant Data size
107+
+ 8
108+
)
109+
110+
# Instance attributes. @dataclass will turn these into ctor args.
111+
112+
# offset to the flatbuffer data
113+
flatbuffer_offset: int
114+
115+
# flatbuffer size
116+
flatbuffer_size: int
117+
118+
# offset to the constant data
119+
constant_data_offset: int
120+
121+
# constant data size
122+
constant_data_size: int
123+
124+
@staticmethod
125+
def from_bytes(data: bytes) -> "XNNHeader":
126+
"""
127+
Converts the given bytes into an XNNHeader object.
128+
129+
We check that the magic and length is valid, but do not check that the offset and
130+
size values are valid. We ensure here that the XNNHeader metadata is valid (magic and length)
131+
but not the offsets and sizes themselves. Callers should use is_valid() to validate the
132+
header contents
133+
134+
Args:
135+
data: Data to read from
136+
Returns:
137+
XNNHeader object that contains the parsed data
138+
Raises:
139+
ValueError: if not enough data is provided, or if parsed length/magic are invalid
140+
"""
141+
if len(data) > XNNHeader.EXPECTED_LENGTH:
142+
raise ValueError(
143+
f"Invalid XNNHeader: expected no more than {XNNHeader.EXPECTED_LENGTH} bytes, got {len(data)}"
144+
)
145+
146+
magic: bytes = data[XNNHeader.MAGIC_OFFSET]
147+
length_bytes: bytes = data[XNNHeader.HEADER_SIZE_OFFSET]
148+
flatbuffer_offset_bytes: bytes = data[XNNHeader.FLATBUFFER_OFFSET_OFFSET]
149+
flatbuffer_size_bytes: bytes = data[XNNHeader.FLATBUFFER_SIZE_OFFSET]
150+
constant_data_offset_bytes: bytes = data[XNNHeader.CONSTANT_DATA_OFFSET_OFFSET]
151+
constant_data_size_bytes: bytes = data[XNNHeader.CONSTANT_DATA_SIZE_OFFSET]
152+
153+
length = int.from_bytes(length_bytes, byteorder=_HEADER_BYTEORDER)
154+
155+
if magic != XNNHeader.EXPECTED_MAGIC:
156+
raise ValueError(
157+
f"Invalid XNNHeader: invalid magic bytes {magic}, expected {XNNHeader.EXPECTED_MAGIC}"
158+
)
159+
if length != len(data):
160+
raise ValueError(
161+
f"Invalid XNNHeader: Invalid parsed length: data given was {len(data)} bytes, parsed length was {length} bytes"
162+
)
163+
164+
return XNNHeader(
165+
flatbuffer_offset=int.from_bytes(
166+
flatbuffer_offset_bytes, byteorder=_HEADER_BYTEORDER
167+
),
168+
flatbuffer_size=int.from_bytes(
169+
flatbuffer_size_bytes, byteorder=_HEADER_BYTEORDER
170+
),
171+
constant_data_offset=int.from_bytes(
172+
constant_data_offset_bytes, byteorder=_HEADER_BYTEORDER
173+
),
174+
constant_data_size=int.from_bytes(
175+
constant_data_size_bytes, byteorder=_HEADER_BYTEORDER
176+
),
177+
)
178+
179+
def is_valid(self) -> bool:
180+
"""
181+
Sanity checks the the XNNHeader.
182+
183+
We check that the flatbuffer size is non_zero and that the constant data offset
184+
is after the flatbuffer payload. We check that the constant data size is non-negative.
185+
186+
Returns:
187+
True if the XNNHeader is valid, False otherwise
188+
"""
189+
# flatbuffer payload must have a non-zero size
190+
valid_flatbuffer_size = self.flatbuffer_size > 0
191+
# constant data offset is after flatbuffer payload
192+
valid_const_data_offset = (
193+
self.constant_data_offset >= self.flatbuffer_offset + self.flatbuffer_size
194+
)
195+
valid_const_data_size = self.constant_data_size >= 0
196+
197+
return (
198+
valid_flatbuffer_size and valid_const_data_offset and valid_const_data_size
199+
)
200+
201+
def to_bytes(self) -> bytes:
202+
"""
203+
Converts XNNHeader to bytes for serialization.
204+
205+
Returns:
206+
Returns the binary representation of the XNNPACK Header.
207+
"""
208+
209+
# We expect the given offsets and sizes to be valid
210+
if not self.is_valid():
211+
raise ValueError("Invalid XNNHeader: header failed is_valid() check")
212+
213+
data: bytes = (
214+
# Padding for magic bytes. This is so that header magic is in the same position
215+
# as the flatbuffer magic, and allows consumer to detect whether the header is
216+
# being used or not
217+
b"\x00\x00\x00\x00"
218+
# XNNPACK Header's magic. This allows consumer to detect whether or not the header
219+
# is being used or the flatbuffer header is being used
220+
+ self.EXPECTED_MAGIC
221+
# uint16_t: Size of this header. This makes it easier to add new fields to the header
222+
# in the future.
223+
+ self.EXPECTED_LENGTH.to_bytes(2, byteorder=_HEADER_BYTEORDER)
224+
# uint32_t: Offset to the start of the flatbuffer data
225+
+ self.flatbuffer_offset.to_bytes(4, byteorder=_HEADER_BYTEORDER)
226+
# uint32_t: Size of the flatbuffer data payload
227+
+ self.flatbuffer_size.to_bytes(4, byteorder=_HEADER_BYTEORDER)
228+
# uint32_t: Offset to the start of the constant data
229+
+ self.constant_data_offset.to_bytes(4, byteorder=_HEADER_BYTEORDER)
230+
# uint64_t: Size of the constant data
231+
+ self.constant_data_size.to_bytes(8, byteorder=_HEADER_BYTEORDER)
232+
)
233+
234+
assert len(data) == XNNHeader.EXPECTED_LENGTH
235+
236+
return data
237+
238+
71239
def convert_to_flatbuffer(xnnpack_graph: XNNGraph) -> bytes:
72240
sanity_check_xnngraph_dataclass(xnnpack_graph)
73241
xnnpack_graph_json = json.dumps(xnnpack_graph, cls=_DataclassEncoder)

backends/xnnpack/test/TARGETS

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
22
load(":targets.bzl", "define_common_targets")
33

4+
oncall("executorch")
5+
46
define_common_targets()
57

68
runtime.python_test(
@@ -60,3 +62,13 @@ runtime.python_test(
6062
"libtorch",
6163
],
6264
)
65+
66+
runtime.python_test(
67+
name = "test_xnnpack_serialization",
68+
srcs = glob([
69+
"serialization/*.py",
70+
]),
71+
deps = [
72+
"//executorch/backends/xnnpack:xnnpack_preprocess",
73+
],
74+
)
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
import unittest
8+
9+
from executorch.backends.xnnpack.serialization.xnnpack_graph_serialize import XNNHeader
10+
11+
EXAMPLE_FLATBUFFER_OFFSET: int = 0x11223344
12+
EXAMPLE_FLATBUFFER_SIZE: int = 0x55667788
13+
EXAMPLE_CONSTANT_DATA_OFFSET: int = EXAMPLE_FLATBUFFER_OFFSET + EXAMPLE_FLATBUFFER_SIZE
14+
EXAMPLE_CONSTANT_DATA_SIZE: int = 0x99AABBCC99AABBCC
15+
16+
# If header layout or magic changes, this test must change too.
17+
# The layout of the header is a contract, not an implementation detail
18+
EXAMPLE_HEADER_DATA: bytes = (
19+
# zeros
20+
b"\x00\x00\x00\x00"
21+
# magic
22+
+ b"XH00"
23+
# All Values below are littl Endian
24+
# header length
25+
+ b"\x1E\x00"
26+
# Flatbuffer Offset
27+
+ b"\x44\x33\x22\x11"
28+
# Flatbuffer Size
29+
+ b"\x88\x77\x66\x55"
30+
# Constant Data Offset
31+
+ b"\xCC\xAA\x88\x66"
32+
# Constant Data Size
33+
+ b"\xCC\xBB\xAA\x99\xCC\xBB\xAA\x99"
34+
)
35+
36+
37+
class TestXNNHeader(unittest.TestCase):
38+
def test_to_bytes(self) -> None:
39+
header = XNNHeader(
40+
EXAMPLE_FLATBUFFER_OFFSET,
41+
EXAMPLE_FLATBUFFER_SIZE,
42+
EXAMPLE_CONSTANT_DATA_OFFSET,
43+
EXAMPLE_CONSTANT_DATA_SIZE,
44+
)
45+
self.assertEqual(header.to_bytes(), EXAMPLE_HEADER_DATA)
46+
self.assertTrue(header.is_valid())
47+
48+
def test_from_bytes(self) -> None:
49+
header = XNNHeader.from_bytes(EXAMPLE_HEADER_DATA)
50+
self.assertEqual(header.flatbuffer_offset, EXAMPLE_FLATBUFFER_OFFSET)
51+
self.assertEqual(header.flatbuffer_size, EXAMPLE_FLATBUFFER_SIZE)
52+
self.assertEqual(header.constant_data_offset, EXAMPLE_CONSTANT_DATA_OFFSET)
53+
self.assertEqual(header.constant_data_size, EXAMPLE_CONSTANT_DATA_SIZE)
54+
55+
def test_invalid_metadata(self) -> None:
56+
WRONG_MAGIC_DATA = EXAMPLE_HEADER_DATA[0:4] + b"YT01" + EXAMPLE_HEADER_DATA[8:]
57+
with self.assertRaisesRegex(
58+
ValueError,
59+
"Invalid XNNHeader: invalid magic bytes b'YT01', expected b'XH00'",
60+
):
61+
XNNHeader.from_bytes(WRONG_MAGIC_DATA)
62+
63+
WRONG_LENGTH_DATA = (
64+
EXAMPLE_HEADER_DATA[0:8] + b"\x1D\x00" + EXAMPLE_HEADER_DATA[10:]
65+
)
66+
with self.assertRaisesRegex(
67+
ValueError,
68+
"Invalid XNNHeader: Invalid parsed length: data given was 30 bytes, parsed length was 29 bytes",
69+
):
70+
XNNHeader.from_bytes(WRONG_LENGTH_DATA)
71+
72+
with self.assertRaisesRegex(
73+
ValueError,
74+
"Invalid XNNHeader: expected no more than 30 bytes, got 31",
75+
):
76+
XNNHeader.from_bytes(EXAMPLE_HEADER_DATA + b"\x00")
77+
78+
def test_invalid_flatbuffer_size(self) -> None:
79+
header = XNNHeader(
80+
EXAMPLE_FLATBUFFER_OFFSET,
81+
0,
82+
EXAMPLE_CONSTANT_DATA_OFFSET,
83+
EXAMPLE_CONSTANT_DATA_SIZE,
84+
)
85+
86+
with self.assertRaises(ValueError):
87+
header.to_bytes()
88+
89+
def test_invalid_constant_data_offset(self) -> None:
90+
header = XNNHeader(
91+
EXAMPLE_FLATBUFFER_OFFSET,
92+
EXAMPLE_FLATBUFFER_SIZE,
93+
EXAMPLE_FLATBUFFER_OFFSET + EXAMPLE_FLATBUFFER_SIZE - 1,
94+
EXAMPLE_CONSTANT_DATA_SIZE,
95+
)
96+
97+
with self.assertRaises(ValueError):
98+
header.to_bytes()
99+
100+
def test_to_bytes_same_as_from_bytes(self) -> None:
101+
header = XNNHeader.from_bytes(EXAMPLE_HEADER_DATA)
102+
103+
to_bytes = header.to_bytes()
104+
self.assertEquals(EXAMPLE_HEADER_DATA, to_bytes)

0 commit comments

Comments
 (0)