Skip to content

Commit cda71e9

Browse files
dbortfacebook-github-bot
authored andcommitted
Use flatc tool instead of bindings
Summary: C++ bindings make it a lot harder to create a pip-installable version of exir, since it would need to include prebuilt versions of the code for every architecture (or include a build system to build the bindings and flatbuffer library during install). Instead, use the `flatc` commandline tool, which is a thin wrapper around the API we were already using. - For internal builds, we embed `flatc` into the python package as a resource, keeping it self-contained. OSS builds could also choose to do this if they wanted to. - If the binary is not present as a resource (like in the current OSS build), fall back to looking for `flatc` on the PATH. Upsides: - Lets OSS users figure out the best way to build and provide the `flatc` tool. - Still gives us a way to provide a specific `flatc` implementation if desired, both internally and in OSS. - Since `flatbuffers` is no longer linked into the binary, we won't run into flatbuffer versioning conflicts in bento etc. Downsides: - The `flatc` on the PATH may not be the version we use internally. As long as it understands our schema, it should work since the flatbuffers contract should ensure that things stay wire-compatible. But this might be something to watch. - This now spawns a new process to generate the flatbuffer data instead of running in the python process. But given the typical size of production models, the process spawn/death overhead should be minimal relative to the rest of the export process: `time flatc` on my M1 macbook shows ~11ms wall time. Reviewed By: mergennachin, tarun292 Differential Revision: D47074772 fbshipit-source-id: 4e0a4f19a7e78f2b9e1778b8f3dffe890119c278
1 parent 7e7cba1 commit cda71e9

File tree

2 files changed

+77
-17
lines changed

2 files changed

+77
-17
lines changed

exir/serialize/_flatbuffer.py

Lines changed: 71 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,14 @@
11
# pyre-strict
22

3+
import importlib.resources
34
import os
45
import re
6+
import subprocess
57
import tempfile
68

79
from dataclasses import dataclass
810
from typing import Callable, Dict, Optional, Sequence
911

10-
# pyre-ignore[21]: Could not find module `executorch.exir.serialize.bindings`.
11-
import executorch.exir.serialize.bindings as bindings # @manual=//executorch/exir/serialize:bindings
12-
13-
# @manual=fbsource//third-party/pypi/setuptools:setuptools
14-
import pkg_resources
15-
1612

1713
def _is_valid_alignment(alignment: int) -> bool:
1814
"""Returns True if the alignment is valid, or is None."""
@@ -94,7 +90,7 @@ def __init__(self, resource_names: Sequence[str]) -> None:
9490
# Map each name to its contents.
9591
self._files: Dict[str, bytes] = {}
9692
for name in resource_names:
97-
self._files[name] = pkg_resources.resource_string(__name__, name)
93+
self._files[name] = importlib.resources.read_binary(__package__, name)
9894

9995
def patch_files(self, patch_fn: Callable[[bytes], bytes]) -> None:
10096
"""Uses the provided patching function to update the contents of all
@@ -169,6 +165,72 @@ class _FlatbufferResult:
169165
max_alignment: int
170166

171167

168+
# Name of an optional resource containing the `flatc` executable.
169+
_FLATC_RESOURCE_NAME: str = "flatbuffers-flatc"
170+
171+
172+
def _run_flatc(args: Sequence[str]) -> None:
173+
"""Runs the `flatc` command with the provided args.
174+
175+
If a resource matching _FLATC_RESOURCE_NAME exists, uses that executable.
176+
Otherwise, expects the `flatc` tool to be available on the system path.
177+
"""
178+
if importlib.resources.is_resource(__package__, _FLATC_RESOURCE_NAME):
179+
# Use the provided flatc binary.
180+
with importlib.resources.path(__package__, _FLATC_RESOURCE_NAME) as flatc_path:
181+
subprocess.run([flatc_path] + list(args), check=True)
182+
else:
183+
# Expect the `flatc` tool to be on the system path.
184+
subprocess.run(["flatc"] + list(args), check=True)
185+
186+
187+
def _flatc_compile(output_dir: str, schema_path: str, json_path: str) -> None:
188+
"""Serializes JSON data to a binary flatbuffer file.
189+
190+
Args:
191+
output_dir: Directory under which to create the binary flatbuffer file.
192+
schema_path: Path to the flatbuffer schema to use for serialization.
193+
If the schema inclues other schema files, they must be present in
194+
the same directory.
195+
json_path: Path to the data to serialize, as JSON data whose structure
196+
matches the schema.
197+
"""
198+
_run_flatc(
199+
[
200+
"--binary",
201+
"-o",
202+
output_dir,
203+
schema_path,
204+
json_path,
205+
]
206+
)
207+
208+
209+
def _flatc_decompile(output_dir: str, schema_path: str, bin_path: str) -> None:
210+
"""Deserializes binary flatbuffer data to a JSON file.
211+
212+
Args:
213+
output_dir: Directory under which to create the JSON file.
214+
schema_path: Path to the flatbuffer schema to use for deserialization.
215+
If the schema inclues other schema files, they must be present in
216+
the same directory.
217+
bin_path: Path to the data to deserialize, as binary data compatible
218+
with the schema.
219+
"""
220+
_run_flatc(
221+
[
222+
"--json",
223+
"--defaults-json",
224+
"--strict-json",
225+
"-o",
226+
output_dir,
227+
schema_path,
228+
"--",
229+
bin_path,
230+
]
231+
)
232+
233+
172234
def _program_json_to_flatbuffer(
173235
program_json: str,
174236
*,
@@ -202,8 +264,7 @@ def _program_json_to_flatbuffer(
202264
with open(json_path, "wb") as json_file:
203265
json_file.write(program_json.encode("ascii"))
204266

205-
# pyre-ignore[16]: Module `executorch.exir.serialize` has no attribute `bindings`.
206-
bindings.flatc_compile(temp_dir, schema_info.root_path, json_path)
267+
_flatc_compile(temp_dir, schema_info.root_path, json_path)
207268
with open(output_path, "rb") as output_file:
208269
return _FlatbufferResult(
209270
data=output_file.read(), max_alignment=schema_info.max_alignment
@@ -226,7 +287,6 @@ def _program_flatbuffer_to_json(program_flatbuffer: bytes) -> bytes:
226287
with open(bin_path, "wb") as bin_file:
227288
bin_file.write(program_flatbuffer)
228289

229-
# pyre-ignore[16]: Module `executorch.exir.serialize` has no attribute `bindings`.
230-
bindings.flatc_decompile(temp_dir, schema_info.root_path, bin_path)
290+
_flatc_decompile(temp_dir, schema_info.root_path, bin_path)
231291
with open(json_path, "rb") as output_file:
232292
return output_file.read()

exir/serialize/test/test_flatbuffer.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@ def make_resource_files(self, files: Dict[str, bytes]) -> _ResourceFiles:
3232
files: Mapping of filename to contents.
3333
"""
3434
with patch.object(
35-
_flatbuffer.pkg_resources, "resource_string"
36-
) as mock_resource_string:
35+
_flatbuffer.importlib.resources, "read_binary"
36+
) as mock_read_binary:
3737
# Use the fake resource files when looking up resources.
38-
mock_resource_string.side_effect = lambda _, name: files[name]
38+
mock_read_binary.side_effect = lambda _, name: files[name]
3939
return _ResourceFiles(tuple(files.keys()))
4040

4141
def test_load_and_write(self) -> None:
@@ -105,10 +105,10 @@ def call_prepare_schema(
105105
of the schema files.
106106
"""
107107
with patch.object(
108-
_flatbuffer.pkg_resources, "resource_string"
109-
) as mock_resource_string:
108+
_flatbuffer.importlib.resources, "read_binary"
109+
) as mock_read_binary:
110110
# Use the fake resource files when looking up resources.
111-
mock_resource_string.side_effect = lambda _, name: schema_files[name]
111+
mock_read_binary.side_effect = lambda _, name: schema_files[name]
112112
return _flatbuffer._prepare_schema(
113113
out_dir=out_dir,
114114
constant_tensor_alignment=constant_tensor_alignment,

0 commit comments

Comments
 (0)