pytorch · guangy10 · Aug 24, 2023
diff --git a/examples/backend/README b/examples/backend/README
diff --git a/examples/backend/README.md b/examples/backend/README.md
@@ -0,0 +1,33 @@
+This README gives some examples on backend-specific model workflow.
+
+# XNNPACK Backend
+
+[XNNPACK](https://github.com/google/XNNPACK) is a library of optimized of neural network inference operators for ARM and x86 platforms. Our delegate lowers models to run using these highly optimized CPU operators. You can try out lowering and running some example models using the following commands:
+
+## XNNPACK delegation-only
+
+The following command will produce an floating-point XNNPACK delegated model `mv2_xnnpack_fp32.pte` that can be run using XNNPACK's operators. It will also print out the lowered graph, showing what parts of the models have been lowered to XNNPACK via `executorch_call_delegate`.
+
+```bash
+# For MobileNet V2
+python3 -m examples.backend.xnnpack_examples --model_name="mv2" --delegate
+```
+
+Once we have the model binary (pte) file, then let's run it with Executorch runtime using the `xnn_executor_runner`.
+
+```bash
+buck2 run examples/backend:xnn_executor_runner -- --model_path ./mv2_xnnpack_fp32.pte
+```
+
+## XNNPACK quantization + delegation
+The following command will produce an XNNPACK quantized and delegated model `mv2_xnnpack_q8.pte` that can be run using XNNPACK's operators. It will also print out the lowered graph, showing what parts of the models have been lowered to XNNPACK via `executorch_call_delegate`.
+
+```bash
+python3 -m examples.backend.xnnpack_examples --model_name="mv2" --quantize --delegate
+```
+
+Once we have the model binary (pte) file, then let's run it with Executorch runtime using the `xnn_executor_runner`.
+
+```bash
+buck2 run examples/backend:xnn_executor_runner -- --model_path ./mv2_xnnpack_q8.pte
+```
diff --git a/examples/backend/TARGETS b/examples/backend/TARGETS
@@ -1,19 +1,8 @@
-load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
+# Any targets that should be shared between fbcode and xplat must be defined in
+# targets.bzl. This file can contain fbcode-only targets.
+
 load(":targets.bzl", "define_common_targets")
 
 oncall("executorch")
 
 define_common_targets()
-
-runtime.python_binary(
-    name = "xnnpack_examples",
-    main_src = "xnnpack_examples.py",
-    deps = [
-        "//caffe2:torch",
-        "//executorch/backends/xnnpack:xnnpack_preprocess",
-        "//executorch/backends/xnnpack/partition:xnnpack_partitioner",
-        "//executorch/examples/models:models",
-        "//executorch/examples/quantization:quant_utils",
-        "//executorch/exir/backend:backend_api",
-    ],
-)
diff --git a/examples/backend/targets.bzl b/examples/backend/targets.bzl
@@ -7,7 +7,29 @@ def define_common_targets():
     TARGETS and BUCK files that call this function.
     """
 
-    # executor runner for XNNPACK Backend and portable kernels.
+    runtime.python_binary(
+        name = "xnnpack_examples",
+        main_module = "executorch.examples.backend.xnnpack_examples",
+        deps = [
+            ":xnnpack_examples_lib",
+        ],
+    )
+
+    runtime.python_library(
+        name = "xnnpack_examples_lib",
+        srcs = [
+            "xnnpack_examples.py",
+        ],
+        deps = [
+            "//executorch/backends/xnnpack/partition:xnnpack_partitioner",
+            "//executorch/examples/models:models",
+            "//executorch/examples/quantization:quant_utils",
+            "//executorch/exir:lib",
+            "//executorch/exir/backend:backend_api",
+        ],
+    )
+
+    # executor_runner for XNNPACK Backend and portable kernels.
     runtime.cxx_binary(
         name = "xnn_executor_runner",
         srcs = [],

diff --git a/examples/backend/xnnpack_examples.py b/examples/backend/xnnpack_examples.py
@@ -85,7 +85,8 @@
 
     exec_prog = edge.to_executorch()
     buffer = exec_prog.buffer
-    quant_tag = "_quantize" if args.quantize else ""
+
+    quant_tag = "q8" if args.quantize else "fp32"
     filename = f"{args.model_name}_xnnpack_{quant_tag}.pte"
     logging.info(f"Saving exported program to {filename}.")
     with open(filename, "wb") as f: