Skip to content

Commit 4a2ae4c

Browse files
mcr229facebook-github-bot
authored andcommitted
example (#64)
Summary: Pull Request resolved: #64 Adding export example for XNNPACK delegated models, also adding to executor runner to run Differential Revision: D48371417 fbshipit-source-id: b5a4c3b341e0e3e1607a6ac16bf82d0e1ccaac5c
1 parent 4af012e commit 4a2ae4c

File tree

3 files changed

+126
-0
lines changed

3 files changed

+126
-0
lines changed

examples/backend/TARGETS

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
2+
3+
runtime.python_binary(
4+
name = "xnnpack_lowering_examples",
5+
main_src = "xnnpack_lowering_examples.py",
6+
deps = [
7+
"//caffe2:torch",
8+
"//executorch/backends/xnnpack:xnnpack_preprocess",
9+
"//executorch/backends/xnnpack/partition:xnnpack_partitioner",
10+
"//executorch/examples/models:models",
11+
"//executorch/exir/backend:backend_api",
12+
],
13+
)
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
# Example script for exporting simple models to flatbuffer
8+
9+
import argparse
10+
import copy
11+
12+
import executorch.exir as exir
13+
import torch._export as export
14+
from executorch.backends.xnnpack.partition.xnnpack_partitioner import (
15+
XnnpackFloatingPointPartitioner,
16+
XnnpackQuantizedPartitioner2,
17+
)
18+
from executorch.exir.backend.backend_api import to_backend, validation_disabled
19+
20+
from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
21+
from torch.ao.quantization.quantizer.xnnpack_quantizer import (
22+
get_symmetric_quantization_config,
23+
XNNPACKQuantizer,
24+
)
25+
26+
from ..models import MODEL_NAME_TO_MODEL
27+
28+
# Note: for mv3, the mul op is not supported in XNNPACKQuantizer, that could be supported soon
29+
XNNPACK_MODEL_NAME_TO_MODEL = {
30+
name: MODEL_NAME_TO_MODEL[name] for name in ["linear", "add", "add_mul", "mv2"]
31+
}
32+
33+
34+
def quantize(model, example_inputs):
35+
"""This is the official recommended flow for quantization in pytorch 2.0 export"""
36+
m = model.eval()
37+
m = export.capture_pre_autograd_graph(m, copy.deepcopy(example_inputs))
38+
quantizer = XNNPACKQuantizer()
39+
# if we set is_per_channel to True, we also need to add out_variant of quantize_per_channel/dequantize_per_channel
40+
operator_config = get_symmetric_quantization_config(is_per_channel=False)
41+
quantizer.set_global(operator_config)
42+
m = prepare_pt2e(m, quantizer)
43+
# calibration
44+
m(*example_inputs)
45+
m = convert_pt2e(m)
46+
return m
47+
48+
49+
if __name__ == "__main__":
50+
parser = argparse.ArgumentParser()
51+
parser.add_argument(
52+
"-m",
53+
"--model_name",
54+
required=True,
55+
help=f"Provide model name. Valid ones: {list(XNNPACK_MODEL_NAME_TO_MODEL.keys())}",
56+
)
57+
parser.add_argument(
58+
"-q",
59+
"--quantize",
60+
action="store_true",
61+
required=False,
62+
default=False,
63+
help="Flag for producing quantized or floating-point model",
64+
)
65+
args = parser.parse_args()
66+
67+
if args.model_name not in XNNPACK_MODEL_NAME_TO_MODEL:
68+
raise RuntimeError(
69+
f"Model {args.model_name} is not a valid name. or not quantizable right now, "
70+
"please contact executorch team if you want to learn why or how to support "
71+
"quantization for the requested model"
72+
f"Available models are {list(XNNPACK_MODEL_NAME_TO_MODEL.keys())}."
73+
)
74+
75+
model, example_inputs = MODEL_NAME_TO_MODEL[args.model_name]()
76+
model = model.eval()
77+
78+
partitioner = XnnpackFloatingPointPartitioner
79+
if args.quantize:
80+
print("Quantizing Model...")
81+
model = quantize(model, example_inputs)
82+
# Partitioner will eventually be a single partitioner for both fp32 and quantized models
83+
partitioner = XnnpackQuantizedPartitioner2
84+
85+
edge = exir.capture(
86+
model, example_inputs, exir.CaptureConfig(enable_aot=True, _unlift=True)
87+
).to_edge(exir.EdgeCompileConfig(_check_ir_validity=False))
88+
print("Exported graph:\n", edge.exported_program.graph)
89+
90+
with validation_disabled():
91+
edge.exported_program = to_backend(edge.exported_program, partitioner)
92+
print("Lowered graph:\n", edge.exported_program.graph)
93+
94+
exec_prog = edge.to_executorch()
95+
buffer = exec_prog.buffer
96+
quant_tag = "_quantize" if args.quantize else ""
97+
filename = f"xnnpack_{args.model_name}{quant_tag}.pte"
98+
print(f"Saving exported program to {filename}.")
99+
with open(filename, "wb") as f:
100+
f.write(buffer)

examples/executor_runner/targets.bzl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,16 @@ def define_common_targets():
5050
define_static_target = True,
5151
**get_oss_build_kwargs()
5252
)
53+
54+
# executor runner for XNNPACK Backend and portable kernels.
55+
runtime.cxx_binary(
56+
name = "xnn_executor_runner",
57+
srcs = [],
58+
deps = [
59+
":executor_runner_lib",
60+
"//executorch/backends/xnnpack:xnnpack_backend",
61+
"//executorch/kernels/portable:generated_lib_all_ops",
62+
] + custom_ops_lib,
63+
define_static_target = True,
64+
**get_oss_build_kwargs()
65+
)

0 commit comments

Comments
 (0)