Restructuring demos - runtime (#531)

guangy10 · facebook-github-bot · commit 889d9905a9a4 · 2023-09-29T20:23:48.000-07:00
Summary: ## Proposed code structure (dir only): ``` executorch/examples/ ├── README.md # top-level description for demos and folder structures ├── models │ ├── deeplab_v3 │ | ... │ └── wav2letter ├── quantization │ ├── toy_quantizer │ └── xnnpack ├── backend │ ├── toy_backend │ └── xnnpack ├── export │ ├── portable │ └── xnnpack ├── recipes # AOT + runtime to create a demo experience. Where description (README.md) for each demo should go │ ├── arm_tosa_delegate │ ├── portable_mode │ ├── toy_quantize_and_delegate # for vendor │ └── xnnpack ├── runtime │ ├── bundled │ │ ├── bundled_executor_runner │ ├── portable │ │ ├── executor_runner │ └── xnnpack │ └── xnn_executor_runner ├── third-party ├── ios_demo_apps ├── android_demo_apps ├── custom_ops # non-e2e demo └── selective_build # non-e2e demo ``` Reference to old code structure: https://github.com/pytorch/executorch/tree/main/examples ## This Diff Focus on restructing runtime examples: ``` executorch/examples/runtime/ ├── bundled │ ├── BUCK │ ├── bundled_executor_runner.cpp │ ├── TARGETS │ └── targets.bzl ├── portable │ ├── BUCK │ ├── executor_runner.cpp │ ├── TARGETS │ └── targets.bzl └── xnnpack ├── TARGETS └── targets.bzl ``` NOTE: Please ignore CI failures, build files and test files, and only focus to discuss if the proposed code structure will provide better user experience. Differential Revision: D49714823
diff --git a/.ci/scripts/test.sh b/.ci/scripts/test.sh
@@ -57,7 +57,7 @@ test_model() {
 
   # Run test model
   if [[ "${BUILD_TOOL}" == "buck2" ]]; then
-    buck2 run //examples/executor_runner:executor_runner -- --model_path "./${MODEL_NAME}.pte"
+    buck2 run //examples/runtime/executor_runner:executor_runner -- --model_path "./${MODEL_NAME}.pte"
   elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
     if [[ ! -f ${CMAKE_OUTPUT_DIR}/executor_runner ]]; then
       build_cmake_executor_runner
@@ -109,7 +109,7 @@ test_model_with_xnnpack() {
 
   # Run test model
   if [[ "${BUILD_TOOL}" == "buck2" ]]; then
-    buck2 run //examples/backend:xnn_executor_runner -- --model_path "${OUTPUT_MODEL_PATH}"
+    buck2 run //examples/runtime/xnnpack:xnn_executor_runner -- --model_path "${OUTPUT_MODEL_PATH}"
   elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
     if [[ ! -f ${CMAKE_OUTPUT_DIR}/backends/xnnpack/xnn_executor_runner ]]; then
       build_cmake_xnn_executor_runner
@@ -129,9 +129,9 @@ test_demo_backend_delegation() {
 
   # Run test model
   if [[ "${BUILD_TOOL}" == "buck2" ]]; then
-    buck2 run //examples/executor_runner:executor_runner -- --model_path "./composite_model.pte"
-    buck2 run //examples/executor_runner:executor_runner -- --model_path "./partition_lowered_model.pte"
-    buck2 run //examples/executor_runner:executor_runner -- --model_path "./whole.pte"
+    buck2 run //examples/runtime/executor_runner:executor_runner -- --model_path "./composite_model.pte"
+    buck2 run //examples/runtime/executor_runner:executor_runner -- --model_path "./partition_lowered_model.pte"
+    buck2 run //examples/runtime/executor_runner:executor_runner -- --model_path "./whole.pte"
   elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
     if [[ ! -f ${CMAKE_OUTPUT_DIR}/executor_runner ]]; then
       build_cmake_executor_runner
diff --git a/.ci/scripts/utils.sh b/.ci/scripts/utils.sh
@@ -70,7 +70,7 @@ install_flatc_from_source() {
 
 build_executorch_runner_buck2() {
   # Build executorch runtime with retry as this step is flaky on macos CI
-  retry buck2 build //examples/executor_runner:executor_runner
+  retry buck2 build //examples/runtime/executor_runner:executor_runner
 }
 
 build_executorch_runner_cmake() {
diff --git a/build/cmake_deps.toml b/build/cmake_deps.toml
@@ -13,7 +13,7 @@ excludes = [
 
 [targets.executor_runner]
 buck_targets = [
-  "//examples/executor_runner:executor_runner",
+  "//examples/runtime/portable:executor_runner",
 ]
 filters = [
   ".cpp$",
@@ -93,7 +93,7 @@ deps = [
 
 [targets.xnn_executor_runner]
 buck_targets = [
-  "//examples/backend:xnn_executor_runner",
+  "//examples/runtime/xnnpack:xnn_executor_runner",
 ]
 filters = [
   ".cpp$",
diff --git a/docs/source/getting-started-setup.md b/docs/source/getting-started-setup.md
@@ -193,7 +193,7 @@ Complete the following steps:
 
 3. Build a binary:
    ```bash
-   /tmp/buck2 build //examples/executor_runner:executor_runner --show-output
+   /tmp/buck2 build //examples/runtime/executor_runner:executor_runner --show-output
    ```
 
    :::{dropdown} Output
@@ -227,7 +227,7 @@ the `buck run` command to run our program.
    * To run the `add.pte` program:
 
      ```bash
-     /tmp/buck2 run //examples/executor_runner:executor_runner -- --model_path add.pte
+     /tmp/buck2 run //examples/runtime/executor_runner:executor_runner -- --model_path add.pte
      ```
 
      :::{dropdown} Sample Output
diff --git a/docs/website/docs/tutorials/00_setting_up_executorch.md b/docs/website/docs/tutorials/00_setting_up_executorch.md
@@ -98,7 +98,7 @@ You may want to copy the `buck2` binary into your `$PATH` so you can run it as `
 `executor_runner` is an example wrapper around executorch runtime which includes all the operators and backends
 
 ```bash
-/tmp/buck2 build //examples/executor_runner:executor_runner --show-output
+/tmp/buck2 build //examples/runtime/executor_runner:executor_runner --show-output
 ```
 
 The `--show-output` flag will print the path to the executable if you want to run it directly.
@@ -112,10 +112,10 @@ conda install -c conda-forge lld
 
 ```bash
 # add.pte is the program generated from export_example.py during AOT Setup Step 3
-/tmp/buck2 run //examples/executor_runner:executor_runner -- --model_path add.pte
+/tmp/buck2 run //examples/runtime/executor_runner:executor_runner -- --model_path add.pte
 
 # To run a delegated model
-/tmp/buck2 run //examples/executor_runner:executor_runner -- --model_path composite_model.pte
+/tmp/buck2 run //examples/runtime/executor_runner:executor_runner -- --model_path composite_model.pte
 ```
 
 or execute the binary directly from the `--show-output` path shown when building.
diff --git a/examples/README.md b/examples/README.md
@@ -1,35 +1,35 @@
 # Examples
 
-This dir contains scripts and other helper utilities to illustrate an end-to-end workflow to run a torch.nn.module on the ExecuTorch runtime.
+This dir contains scripts and other helper utilities to illustrate an end-to-end workflow to run a torch.nn.module on the Executorch runtime.
 It also includes a list of modules, from a simple `Add` to a full model like `MobileNetv2` and `MobileNetv3`, with more to come.
 
 
 ## Directory structure
 ```bash
 examples
 |── backend                           # Contains examples for exporting delegate models and running them using custom executor runners
-├── custom_ops                        # Contains examples to register custom operators into PyTorch as well as register its kernels into ExecuTorch runtime
+├── custom_ops                        # Contains examples to register custom operators into PyTorch as well as register its kernels into Executorch runtime
 ├── example_quantizer_and_delegate    # Contains examples to to fully lowered a MobileNetV2 model to the example backend with an example quantizer
-├── executor_runner                   # This is an example C++ wrapper around the ET runtime
 ├── export                            # Python helper scripts to illustrate export workflow
 ├── ios_demo_apps                     # Contains iOS demo apps
-├── models                            # Contains a set of simple to PyTorch models
+├── models                            # Contains a set of out-of-box PyTorch models
 ├── quantization                      # Contains examples of quantization workflow
-├── arm                               # Contains examples of the Arm TOSA and Ethos-U NPU flows
+├── recipes                           # Contains recipes for a set of demos
+├── runtime                           # Contains examples of C++ wrapper around the ET runtime
 └── README.md                         # This file
 ```
 
 ## Using the examples
 
 We will walk through an example model to generate a binary file from a python torch.nn.module
 from the `models` dir using scripts from the `export` dir. Then we will run on these binary
-model files on the ExecuTorch (ET) runtime. For that we will use `executor_runner`. It is a simple
-wrapper for the ExecuTorch runtime to serve as an example. Although simple, it is capable of loading
+model files on the Executorch (ET) runtime. For that we will use `executor_runner`. It is a simple
+wrapper for the Executorch runtime to serve as an example. Although simple, it is capable of loading
 and executing previously exported binary file(s).
 
 
 1. Following the setup guide in [Setting up ExecuTorch from GitHub](/docs/website/docs/tutorials/00_setting_up_executorch.md)
-you should be able to get the basic development environment for ExecuTorch working.
+you should be able to get the basic development environment for Executorch working.
 
 2. Using the script `export/export_example.py` generate a model binary file by selecting a
 model name from the list of available models in the `models` dir.
@@ -49,10 +49,10 @@ python3 -m examples.export.export_example --model_name="mv2" # for MobileNetv2
 
 Use `-h` (or `--help`) to see all the supported models.
 
-3. Once we have the model binary (pte) file, then let's run it with ExecuTorch runtime using the `executor_runner`.
+3. Once we have the model binary (pte) file, then let's run it with Executorch runtime using the `executor_runner`.
 
 ```bash
-buck2 run examples/executor_runner:executor_runner -- --model_path mv2.pte
+buck2 run examples/runtime/executor_runner:executor_runner -- --model_path mv2.pte
 ```
 
 ## Quantization
@@ -87,7 +87,7 @@ buck2 run executorch/examples/quantization:example -- --help
 Quantized model can be run via executor_runner, similar to floating point model, via, as shown above:
 
 ```bash
-buck2 run examples/executor_runner:executor_runner -- --model_path mv2.pte
+buck2 run examples/runtime/executor_runner:executor_runner -- --model_path mv2.pte
 ```
 
 Note that, running quantized model, requires various quantized/dequantize operators, available in [quantized kernel lib](/kernels/quantized).
diff --git a/examples/backend/README.md b/examples/backend/README.md
@@ -13,10 +13,10 @@ The following command will produce an floating-point XNNPACK delegated model `mv
 python3 -m examples.backend.xnnpack_examples --model_name="mv2" --delegate
 ```
 
-Once we have the model binary (pte) file, then let's run it with ExecuTorch runtime using the `xnn_executor_runner`.
+Once we have the model binary (pte) file, then let's run it with Executorch runtime using the `xnn_executor_runner`.
 
 ```bash
-buck2 run examples/backend:xnn_executor_runner -- --model_path ./mv2_xnnpack_fp32.pte
+buck2 run examples/runtime/xnnpack:xnn_executor_runner -- --model_path ./mv2_xnnpack_fp32.pte
 ```
 
 ## XNNPACK quantization + delegation
@@ -26,10 +26,10 @@ The following command will produce an XNNPACK quantized and delegated model `mv2
 python3 -m examples.backend.xnnpack_examples --model_name="mv2" --quantize --delegate
 ```
 
-Once we have the model binary (pte) file, then let's run it with ExecuTorch runtime using the `xnn_executor_runner`.
+Once we have the model binary (pte) file, then let's run it with Executorch runtime using the `xnn_executor_runner`.
 
 ```bash
-buck2 run examples/backend:xnn_executor_runner -- --model_path ./mv2_xnnpack_q8.pte
+buck2 run examples/runtime/xnnpack:xnn_executor_runner -- --model_path ./mv2_xnnpack_q8.pte
 ```
 
 ## XNNPACK performance gain
@@ -40,14 +40,14 @@ We tested the performance for MobileNet V2 and MobileNet V3 on Linux x86 and Mac
 
 For each model, we export three variations: portable (without any optimization), xnnpack fp32 (exported for XNNPACK delegation without quantization), xnnpack q8 (exported for XNNPACK delegation with qint8 delegation).
 
-We build the benchmarking binary (will be released in the near future, but it is similar to `examples/backend:xnn_executor_runner`). Benchmarking binary, by default, runs 10 iterations of warmup and 50 iterations of benchmarking. Number reported here are average measured latency, in ms, across 50 runs. The first iteration is slower due to warm up, and the performance is is stable on subsequent iterations, so we also report the execution time for the first iteration for reference. Below is the model execution time for first iteration and subsequent iterations (average after warmup), in milliseconds. We use a single thread to test the models. Details about the methodology and repro steps are below the tables.
+We build the benchmarking binary (will be released in the near future, but it is similar to `examples/runtime/xnnpack:xnn_executor_runner`). Benchmarking binary, by default, runs 10 iterations of warmup and 50 iterations of benchmarking. Number reported here are average measured latency, in ms, across 50 runs. The first iteration is slower due to warm up, and the performance is is stable on subsequent iterations, so we also report the execution time for the first iteration for reference. Below is the model execution time for first iteration and subsequent iterations (average after warmup), in milliseconds. We use a single thread to test the models. Details about the methodology and repro steps are below the tables.
 
 ### Methodology
 
-Models are exported with the steps above for XNNPACK delegation, and with `examples/export:export_example` for portable backend without any optimization. Then use `//examples/backend:xnn_executor_runner` with profiler (command listed below); or  in the future, use the runtime in `//sdk/runners:executor_runner` since it gives more options such as number of iterations after build rules for OSS is added.
+Models are exported with the steps above for XNNPACK delegation, and with `examples/export:export_example` for portable backend without any optimization. Then use `//examples/runtime/xnnpack:xnn_executor_runner` with profiler (command listed below); or  in the future, use the runtime in `//sdk/runners:executor_runner` since it gives more options such as number of iterations after build rules for OSS is added.
 
 ```
-buck run -c executorch.prof_enabled=true -c executorch.prof_buf_size=8096 -c executorch.num_prof_blocks=61 //examples/backend:xnn_executor_runner -- --model_path mv3.pte
+buck run -c executorch.prof_enabled=true -c executorch.prof_buf_size=8096 -c executorch.num_prof_blocks=61 //examples/runtime/xnnpack:xnn_executor_runner -- --model_path mv3.pte
 ```
 
 A rough number of execution time can be obtained via the log timestamp. The profiler result can be analyzed with `profiler:profiler_results_cli`.
diff --git a/examples/backend/targets.bzl b/examples/backend/targets.bzl
@@ -29,16 +29,3 @@ def define_common_targets():
             "//executorch/exir/backend:backend_api",
         ],
     )
-
-    # executor_runner for XNNPACK Backend and portable kernels.
-    runtime.cxx_binary(
-        name = "xnn_executor_runner",
-        srcs = [],
-        deps = [
-            "//executorch/examples/executor_runner:executor_runner_lib",
-            "//executorch/backends/xnnpack:xnnpack_backend",
-            "//executorch/kernels/portable:generated_lib_all_ops",
-        ],
-        define_static_target = True,
-        **get_oss_build_kwargs()
-    )
diff --git a/examples/custom_ops/test_custom_ops.sh b/examples/custom_ops/test_custom_ops.sh
@@ -21,7 +21,7 @@ test_buck2_custom_op_1() {
   # should save file custom_ops_1.pte
 
   echo 'Running executor_runner'
-  buck2 run //examples/executor_runner:executor_runner \
+  buck2 run //examples/runtime/executor_runner:executor_runner \
       --config=executorch.register_custom_op=1 -- --model_path="./${model_name}.pte"
   # should give correct result
 
@@ -58,7 +58,7 @@ test_buck2_custom_op_2() {
   ${PYTHON_EXECUTABLE} -m "examples.custom_ops.${model_name}" --so_library="$SO_LIB"
   # should save file custom_ops_2.pte
 
-  buck2 run //examples/executor_runner:executor_runner \
+  buck2 run //examples/runtime/executor_runner:executor_runner \
       --config=executorch.register_custom_op=2 -- --model_path="./${model_name}.pte"
   # should give correct result
   echo "Removing ${model_name}.pte"
diff --git a/examples/quantization/test_quantize.sh b/examples/quantization/test_quantize.sh
@@ -35,7 +35,7 @@ test_buck2_quantization() {
   ${PYTHON_EXECUTABLE} -m "examples.quantization.example" --so_library="$SO_LIB" --model_name="$1"
 
   echo 'Running executor_runner'
-  $BUCK run //examples/executor_runner:executor_runner -- --model_path="./${1}_quantized.pte"
+  $BUCK run //examples/runtime/executor_runner:executor_runner -- --model_path="./${1}_quantized.pte"
   # should give correct result
 
   echo "Removing ${1}_quantized.pte"
diff --git a/examples/runtime/bundled/TARGETS b/examples/runtime/bundled/TARGETS
diff --git a/examples/runtime/bundled/bundled_executor_runner.cpp b/examples/runtime/bundled/bundled_executor_runner.cpp
diff --git a/examples/runtime/bundled/targets.bzl b/examples/runtime/bundled/targets.bzl
diff --git a/examples/runtime/portable/TARGETS b/examples/runtime/portable/TARGETS
diff --git a/examples/runtime/portable/executor_runner.cpp b/examples/runtime/portable/executor_runner.cpp
diff --git a/examples/runtime/portable/targets.bzl b/examples/runtime/portable/targets.bzl
diff --git a/examples/runtime/xnnpack/TARGETS b/examples/runtime/xnnpack/TARGETS
@@ -0,0 +1,8 @@
+# Any targets that should be shared between fbcode and xplat must be defined in
+# targets.bzl. This file can contain fbcode-only targets.
+
+load(":targets.bzl", "define_common_targets")
+
+oncall("executorch")
+
+define_common_targets()
diff --git a/examples/runtime/xnnpack/targets.bzl b/examples/runtime/xnnpack/targets.bzl
@@ -0,0 +1,21 @@
+load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_oss_build_kwargs", "runtime")
+
+def define_common_targets():
+    """Defines targets that should be shared between fbcode and xplat.
+
+    The directory containing this targets.bzl file should also contain both
+    TARGETS and BUCK files that call this function.
+    """
+
+    # executor_runner for XNNPACK Backend and portable kernels.
+    runtime.cxx_binary(
+        name = "xnn_executor_runner",
+        srcs = [],
+        deps = [
+            "//executorch/examples/runtime/portable:executor_runner_lib",
+            "//executorch/backends/xnnpack:xnnpack_backend",
+            "//executorch/kernels/portable:generated_lib_all_ops",
+        ],
+        define_static_target = True,
+        **get_oss_build_kwargs()
+    )
diff --git a/examples/selective_build/targets.bzl b/examples/selective_build/targets.bzl
@@ -73,7 +73,7 @@ def define_common_targets():
         name = "selective_build_test",
         srcs = [],
         deps = [
-            "//executorch/examples/executor_runner:executor_runner_lib",
+            "//executorch/examples/runtime/executor_runner:executor_runner_lib",
         ] + lib,
         define_static_target = True,
         **get_oss_build_kwargs()

Original file line number	Diff line number	Diff line change
`@@ -70,7 +70,7 @@ install_flatc_from_source() {`
`70`	`70`
`71`	`71`	`build_executorch_runner_buck2() {`
`72`	`72`	`# Build executorch runtime with retry as this step is flaky on macos CI`
`73`		`- retry buck2 build //examples/executor_runner:executor_runner`
	`73`	`+ retry buck2 build //examples/runtime/executor_runner:executor_runner`
`74`	`74`	`}`
`75`	`75`
`76`	`76`	`build_executorch_runner_cmake() {`