Skip to content

Add model execution scripts and runner #5217

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions backends/mediatek/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,13 @@ include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/runtime/include)

# targets
add_library(neuron_backend SHARED)
target_link_libraries(
neuron_backend PRIVATE executorch_no_prim_ops portable_ops_lib android log
${NEURON_BUFFER_ALLOCATOR_LIB}
target_link_libraries(neuron_backend
PRIVATE
executorch_no_prim_ops
portable_ops_lib
android
log
${NEURON_BUFFER_ALLOCATOR_LIB}
)
target_sources(
neuron_backend
Expand Down
38 changes: 38 additions & 0 deletions examples/mediatek/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,44 @@ if(${ANDROID})
)
target_compile_options(mtk_executor_runner PUBLIC ${_common_compile_options})

set(_mtk_oss_executor_runner__srcs ${_executor_runner__srcs})
list(
TRANSFORM
_mtk_oss_executor_runner__srcs
PREPEND
"${EXECUTORCH_SOURCE_DIR}/"
)
list(
FILTER
_mtk_oss_executor_runner__srcs
EXCLUDE REGEX
".*executor_runner.cpp$"
)
list(
PREPEND
_mtk_oss_executor_runner__srcs
${CMAKE_CURRENT_LIST_DIR}/executor_runner/mtk_oss_executor_runner.cpp
)

add_executable(mtk_oss_executor_runner ${_mtk_oss_executor_runner__srcs})

target_include_directories(mtk_oss_executor_runner
PUBLIC
${_common_include_directories}
${EXECUTORCH_ROOT}/cmake-android-out/third-party/gflags/include
)

target_link_libraries(mtk_oss_executor_runner
${_executor_runner_libs}
executorch
neuron_backend
gflags
)
target_compile_options(mtk_oss_executor_runner
PUBLIC
${_common_compile_options}
)

set(_mtk_llama_executor_runner__srcs ${_mtk_executor_runner__srcs})
list(FILTER _mtk_llama_executor_runner__srcs EXCLUDE REGEX
".*executor_runner.cpp$"
Expand Down
36 changes: 36 additions & 0 deletions examples/mediatek/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ examples/mediatek
├── preformatter_templates # Model specific prompt preformatter templates
├── prompts # Calibration Prompts
├── tokenizers_ # Model tokenizer scripts
├── oss_utils # Utils for oss models
├── eval_utils # Utils for eval oss models
├── model_export_scripts # Model specifc export scripts
├── models # Model definitions
├── llm_models # LLM model definitions
Expand Down Expand Up @@ -44,6 +46,7 @@ pip3 install mtk_converter-8.8.0.dev20240723+public.d1467db9-cp310-cp310-manylin
```

## AoT Flow
### llama
##### Note: Verify that localhost connection is available before running AoT Flow
1. Exporting Models to `.pte`
- In the `examples/mediatek directory`, run:
Expand Down Expand Up @@ -72,6 +75,14 @@ source shell_scripts/export_llama.sh <model_name> <num_chunks> <prompt_num_token
- eg. For `llama3-8B-instruct`, embedding bin generated in `examples/mediatek/models/llm_models/weights/llama3-8B-instruct/`
- AoT flow will take roughly 2.5 hours (114GB RAM for `num_chunks=4`) to complete (Results will vary by device/hardware configurations)

### oss
1. Exporting Model to `.pte`
```bash
bash shell_scripts/export_oss.sh <model_name>
```
- Argument Options:
- `model_name`: deeplabv3/edsr/inceptionv3/inceptionv4/mobilenetv2/mobilenetv3/resnet18/resnet50

# Runtime
## Supported Chips

Expand Down Expand Up @@ -100,6 +111,13 @@ adb push <MODEL_NAME>.pte <PHONE_PATH, e.g. /data/local/tmp>

Make sure to replace `<MODEL_NAME>` with the actual name of your model file. And, replace the `<PHONE_PATH>` with the desired detination on the device.

##### Note: For oss models, please push additional files to your Android device
```bash
adb push mtk_oss_executor_runner <PHONE_PATH, e.g. /data/local/tmp>
adb push input_list.txt <PHONE_PATH, e.g. /data/local/tmp>
for i in input*bin; do adb push "$i" <PHONE_PATH, e.g. /data/local/tmp>; done;
```

### Executing the Model

Execute the model on your Android device by running:
Expand All @@ -111,3 +129,21 @@ adb shell "/data/local/tmp/mtk_executor_runner --model_path /data/local/tmp/<MOD
In the command above, replace `<MODEL_NAME>` with the name of your model file and `<ITER_TIMES>` with the desired number of iterations to run the model.

##### Note: For llama models, please use `mtk_llama_executor_runner`. Refer to `examples/mediatek/executor_runner/run_llama3_sample.sh` for reference.
##### Note: For oss models, please use `mtk_oss_executor_runner`.
```bash
adb shell "/data/local/tmp/mtk_oss_executor_runner --model_path /data/local/tmp/<MODEL_NAME>.pte --input_list /data/local/tmp/input_list.txt --output_folder /data/local/tmp/output_<MODEL_NAME>"
adb pull "/data/local/tmp/output_<MODEL_NAME> ./"
```

### Check oss result on PC
```bash
python3 eval_utils/eval_oss_result.py --eval_type <eval_type> --target_f <golden_folder> --output_f <prediction_folder>
```
For example:
```
python3 eval_utils/eval_oss_result.py --eval_type piq --target_f edsr --output_f output_edsr
```
- Argument Options:
- `eval_type`: topk/piq/segmentation
- `target_f`: folder contain golden data files. file name is `golden_<data_idx>_0.bin`
- `output_f`: folder contain model output data files. file name is `output_<data_idx>_0.bin`
73 changes: 73 additions & 0 deletions examples/mediatek/aot_utils/oss_utils/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# Copyright (c) MediaTek Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import os
from typing import Optional

import torch
from executorch import exir
from executorch.backends.mediatek import (
NeuropilotPartitioner,
NeuropilotQuantizer,
Precision,
)
from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e


def build_executorch_binary(
model,
inputs,
file_name,
dataset,
quant_dtype: Optional[Precision] = None,
):
if quant_dtype is not None:
quantizer = NeuropilotQuantizer()
quantizer.setup_precision(quant_dtype)
if quant_dtype not in Precision:
raise AssertionError(f"No support for Precision {quant_dtype}.")

captured_model = torch._export.capture_pre_autograd_graph(model, inputs)
annotated_model = prepare_pt2e(captured_model, quantizer)
print("Quantizing the model...")
# calibration
for data in dataset:
annotated_model(*data)
quantized_model = convert_pt2e(annotated_model, fold_quantize=False)
aten_dialect = torch.export.export(quantized_model, inputs)
else:
aten_dialect = torch.export.export(model, inputs)

from executorch.exir.program._program import to_edge_transform_and_lower

edge_compile_config = exir.EdgeCompileConfig(_check_ir_validity=False)
# skipped op names are used for deeplabV3 model
neuro_partitioner = NeuropilotPartitioner(
[],
op_names_to_skip={
"aten_convolution_default_106",
"aten_convolution_default_107",
},
)
edge_prog = to_edge_transform_and_lower(
aten_dialect,
compile_config=edge_compile_config,
partitioner=[neuro_partitioner],
)

exec_prog = edge_prog.to_executorch(
config=exir.ExecutorchBackendConfig(extract_constant_segment=False)
)
with open(f"{file_name}.pte", "wb") as file:
file.write(exec_prog.buffer)


def make_output_dir(path: str):
if os.path.exists(path):
for f in os.listdir(path):
os.remove(os.path.join(path, f))
os.removedirs(path)
os.makedirs(path)
Loading
Loading