pytorch · neuropilot-captain · Aug 21, 2024 · Aug 27, 2024 · Aug 28, 2024 · Sep 10, 2024
@@ -25,9 +25,13 @@ include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/runtime/include)
 
 # targets
 add_library(neuron_backend SHARED)
-target_link_libraries(
-  neuron_backend PRIVATE executorch_no_prim_ops portable_ops_lib android log
-                         ${NEURON_BUFFER_ALLOCATOR_LIB}
+target_link_libraries(neuron_backend
+    PRIVATE
+    executorch_no_prim_ops
+    portable_ops_lib
+    android
+    log
+    ${NEURON_BUFFER_ALLOCATOR_LIB}
 )
 target_sources(
   neuron_backend

@@ -75,6 +75,44 @@ if(${ANDROID})
   )
   target_compile_options(mtk_executor_runner PUBLIC ${_common_compile_options})
 
+  set(_mtk_oss_executor_runner__srcs ${_executor_runner__srcs})
+  list(
+      TRANSFORM
+      _mtk_oss_executor_runner__srcs
+      PREPEND
+      "${EXECUTORCH_SOURCE_DIR}/"
+  )
+  list(
+      FILTER
+      _mtk_oss_executor_runner__srcs
+      EXCLUDE REGEX
+      ".*executor_runner.cpp$"
+  )
+  list(
+      PREPEND
+      _mtk_oss_executor_runner__srcs
+      ${CMAKE_CURRENT_LIST_DIR}/executor_runner/mtk_oss_executor_runner.cpp
+  )
+
+  add_executable(mtk_oss_executor_runner ${_mtk_oss_executor_runner__srcs})
+
+  target_include_directories(mtk_oss_executor_runner
+      PUBLIC
+      ${_common_include_directories}
+      ${EXECUTORCH_ROOT}/cmake-android-out/third-party/gflags/include
+  )
+
+  target_link_libraries(mtk_oss_executor_runner
+      ${_executor_runner_libs}
+      executorch
+      neuron_backend
+      gflags
+  )
+  target_compile_options(mtk_oss_executor_runner
+      PUBLIC
+      ${_common_compile_options}
+  )
+
   set(_mtk_llama_executor_runner__srcs ${_mtk_executor_runner__srcs})
   list(FILTER _mtk_llama_executor_runner__srcs EXCLUDE REGEX
        ".*executor_runner.cpp$"

@@ -9,6 +9,8 @@ examples/mediatek
         ├── preformatter_templates    # Model specific prompt preformatter templates
         ├── prompts                   # Calibration Prompts
         ├── tokenizers_               # Model tokenizer scripts
+    ├── oss_utils                     # Utils for oss models
+├── eval_utils                        # Utils for eval oss models
 ├── model_export_scripts              # Model specifc export scripts
 ├── models                            # Model definitions
     ├── llm_models                    # LLM model definitions
@@ -44,6 +46,7 @@ pip3 install mtk_converter-8.8.0.dev20240723+public.d1467db9-cp310-cp310-manylin
 ```
 
 ## AoT Flow
+### llama
 ##### Note: Verify that localhost connection is available before running AoT Flow
 1. Exporting Models to `.pte`
 - In the `examples/mediatek directory`, run:
@@ -72,6 +75,14 @@ source shell_scripts/export_llama.sh <model_name> <num_chunks> <prompt_num_token
     - eg. For `llama3-8B-instruct`, embedding bin generated in `examples/mediatek/models/llm_models/weights/llama3-8B-instruct/`
     - AoT flow will take roughly 2.5 hours (114GB RAM for `num_chunks=4`) to complete (Results will vary by device/hardware configurations)
 
+### oss
+1. Exporting Model to `.pte`
+```bash
+bash shell_scripts/export_oss.sh <model_name>
+```
+- Argument Options:
+    - `model_name`: deeplabv3/edsr/inceptionv3/inceptionv4/mobilenetv2/mobilenetv3/resnet18/resnet50
+
 # Runtime
 ## Supported Chips
 
@@ -100,6 +111,13 @@ adb push <MODEL_NAME>.pte <PHONE_PATH, e.g. /data/local/tmp>
 
 Make sure to replace `<MODEL_NAME>` with the actual name of your model file. And, replace the `<PHONE_PATH>` with the desired detination on the device.
 
+##### Note: For oss models, please push additional files to your Android device
+```bash
+adb push mtk_oss_executor_runner <PHONE_PATH, e.g. /data/local/tmp>
+adb push input_list.txt <PHONE_PATH, e.g. /data/local/tmp>
+for i in input*bin; do adb push "$i" <PHONE_PATH, e.g. /data/local/tmp>; done;
+```
+
 ### Executing the Model
 
 Execute the model on your Android device by running:
@@ -111,3 +129,21 @@ adb shell "/data/local/tmp/mtk_executor_runner --model_path /data/local/tmp/<MOD
 In the command above, replace `<MODEL_NAME>` with the name of your model file and `<ITER_TIMES>` with the desired number of iterations to run the model.
 
 ##### Note: For llama models, please use `mtk_llama_executor_runner`. Refer to `examples/mediatek/executor_runner/run_llama3_sample.sh` for reference.
+##### Note: For oss models, please use `mtk_oss_executor_runner`.
+```bash
+adb shell "/data/local/tmp/mtk_oss_executor_runner --model_path /data/local/tmp/<MODEL_NAME>.pte --input_list /data/local/tmp/input_list.txt --output_folder /data/local/tmp/output_<MODEL_NAME>"
+adb pull "/data/local/tmp/output_<MODEL_NAME> ./"
+```
+
+### Check oss result on PC
+```bash
+python3 eval_utils/eval_oss_result.py --eval_type <eval_type> --target_f <golden_folder> --output_f <prediction_folder>
+```
+For example:
+```
+python3 eval_utils/eval_oss_result.py --eval_type piq --target_f edsr --output_f output_edsr
+```
+- Argument Options:
+    - `eval_type`: topk/piq/segmentation
+    - `target_f`: folder contain golden data files. file name is `golden_<data_idx>_0.bin`
+    - `output_f`: folder contain model output data files. file name is `output_<data_idx>_0.bin`
@@ -0,0 +1,73 @@
+# Copyright (c) MediaTek Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+from typing import Optional
+
+import torch
+from executorch import exir
+from executorch.backends.mediatek import (
+    NeuropilotPartitioner,
+    NeuropilotQuantizer,
+    Precision,
+)
+from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
+
+
+def build_executorch_binary(
+    model,
+    inputs,
+    file_name,
+    dataset,
+    quant_dtype: Optional[Precision] = None,
+):
+    if quant_dtype is not None:
+        quantizer = NeuropilotQuantizer()
+        quantizer.setup_precision(quant_dtype)
+        if quant_dtype not in Precision:
+            raise AssertionError(f"No support for Precision {quant_dtype}.")
+
+        captured_model = torch._export.capture_pre_autograd_graph(model, inputs)
+        annotated_model = prepare_pt2e(captured_model, quantizer)
+        print("Quantizing the model...")
+        # calibration
+        for data in dataset:
+            annotated_model(*data)
+        quantized_model = convert_pt2e(annotated_model, fold_quantize=False)
+        aten_dialect = torch.export.export(quantized_model, inputs)
+    else:
+        aten_dialect = torch.export.export(model, inputs)
+
+    from executorch.exir.program._program import to_edge_transform_and_lower
+
+    edge_compile_config = exir.EdgeCompileConfig(_check_ir_validity=False)
+    # skipped op names are used for deeplabV3 model
+    neuro_partitioner = NeuropilotPartitioner(
+        [],
+        op_names_to_skip={
+            "aten_convolution_default_106",
+            "aten_convolution_default_107",
+        },
+    )
+    edge_prog = to_edge_transform_and_lower(
+        aten_dialect,
+        compile_config=edge_compile_config,
+        partitioner=[neuro_partitioner],
+    )
+
+    exec_prog = edge_prog.to_executorch(
+        config=exir.ExecutorchBackendConfig(extract_constant_segment=False)
+    )
+    with open(f"{file_name}.pte", "wb") as file:
+        file.write(exec_prog.buffer)
+
+
+def make_output_dir(path: str):
+    if os.path.exists(path):
+        for f in os.listdir(path):
+            os.remove(os.path.join(path, f))
+        os.removedirs(path)
+    os.makedirs(path)