Skip to content

Commit 4c06907

Browse files
authored
Qualcomm AI Engine Direct - add cli tool for QNN artifacts (#4731)
Summary: - cli tool for deploying precompiled model library / context bin onto executorch runtime - refactor & mionr fixes Resolved #4731
1 parent 6cb5726 commit 4c06907

File tree

6 files changed

+721
-5
lines changed

6 files changed

+721
-5
lines changed

backends/qualcomm/aot/python/PyQnnWrapperAdaptor.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ class PyQnnTensorWrapper {
171171
return {enc_data, data.axis};
172172
}
173173
default:
174-
QNN_EXECUTORCH_LOG_ERROR(
174+
QNN_EXECUTORCH_LOG_WARN(
175175
"%s QNN_QUANTIZATION_ENCODING_UNDEFINED detected",
176176
GetName().c_str());
177177
break;

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#
44
# This source code is licensed under the BSD-style license found in the
55
# LICENSE file in the root directory of this source tree.
6+
import io
67
import json
78
import subprocess
89
import sys
@@ -1825,6 +1826,96 @@ def required_envs(self, conditions=None) -> bool:
18251826
]
18261827
)
18271828

1829+
def test_utils_export(self):
1830+
with tempfile.TemporaryDirectory() as tmp_dir:
1831+
module = ContextBinaryExample() # noqa: F405
1832+
generate_context_binary(
1833+
module=module,
1834+
inputs=module.example_inputs(),
1835+
quantized=True,
1836+
artifact_dir=tmp_dir,
1837+
)
1838+
ctx_path = f"{tmp_dir}/model_ctx.bin"
1839+
fpath = f"{self.executorch_root}/examples/qualcomm/qaihub_scripts/utils/export.py"
1840+
1841+
# do compilation
1842+
compile_cmds = [
1843+
"python",
1844+
fpath,
1845+
"compile",
1846+
"-a",
1847+
ctx_path,
1848+
"-m",
1849+
self.model,
1850+
"-l",
1851+
"False",
1852+
"-b",
1853+
self.build_folder,
1854+
"-o",
1855+
f"{tmp_dir}/output_pte",
1856+
]
1857+
compile_process = subprocess.Popen(
1858+
compile_cmds, stdout=subprocess.DEVNULL, cwd=self.executorch_root
1859+
)
1860+
output_pte_dir = f"{tmp_dir}/output_pte/model_ctx"
1861+
compile_process.communicate()
1862+
1863+
# check artifacts are correctly generated
1864+
self.assertTrue(
1865+
all(
1866+
[
1867+
Path(output_pte_dir).exists(),
1868+
Path(f"{output_pte_dir}/model_ctx.json").exists(),
1869+
Path(f"{output_pte_dir}/model_ctx.svg").exists(),
1870+
]
1871+
)
1872+
)
1873+
1874+
# prepare input files
1875+
input_list, inputs = [], module.example_inputs()
1876+
for name, tensor in inputs.items():
1877+
tensor_path = f"{output_pte_dir}/{name}.pt"
1878+
torch.save(tensor, tensor_path)
1879+
input_list.append(tensor_path)
1880+
1881+
# do execution
1882+
output_data_dir = f"{tmp_dir}/output_data"
1883+
execute_cmds = [
1884+
"python",
1885+
fpath,
1886+
"execute",
1887+
"-p",
1888+
output_pte_dir,
1889+
"-i",
1890+
*input_list,
1891+
"-s",
1892+
self.device,
1893+
"-z",
1894+
"-b",
1895+
self.build_folder,
1896+
"-o",
1897+
output_data_dir,
1898+
]
1899+
if self.host is not None:
1900+
execute_cmds.append(f"-H {self.host}")
1901+
execute_process = subprocess.Popen(execute_cmds, cwd=self.executorch_root)
1902+
execute_process.communicate()
1903+
1904+
# read outputs
1905+
with open(f"{output_pte_dir}/model_ctx.json", "r") as f:
1906+
graph_info = json.load(f)
1907+
1908+
device_output = []
1909+
for output in graph_info["outputs"]:
1910+
with open(f"{output_data_dir}/{output['name']}.pt", "rb") as f:
1911+
buffer = io.BytesIO(f.read())
1912+
device_output.append(torch.load(buffer, weights_only=False))
1913+
1914+
# validate outputs
1915+
golden_output = module.forward(inputs["x"], inputs["y"])
1916+
self.atol, self.rtol = 1e-1, 1
1917+
self._assert_outputs_equal(golden_output, device_output)
1918+
18281919
def test_llama2_7b(self):
18291920
if not self.required_envs():
18301921
self.skipTest("missing required envs")

backends/qualcomm/utils/utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,9 @@ def capture_program(
232232
return edge_ep
233233

234234

235-
def from_context_binary(ctx_path: str, op_name: str):
235+
def from_context_binary(
236+
ctx_path: str, op_name: str, soc_model: QcomChipset = QcomChipset.SM8650
237+
):
236238
def implement_op(custom_op, op_name, outputs):
237239
@torch.library.impl(
238240
custom_op, str(op_name), dispatch_key="CompositeExplicitAutograd"
@@ -283,7 +285,7 @@ def build_tensor(tensors, dtype_map):
283285
# dummy compiler spec would be fine, since we're not compiling
284286
backend_options = generate_htp_compiler_spec(use_fp16=False)
285287
compiler_specs = generate_qnn_executorch_compiler_spec(
286-
soc_model=QcomChipset.SM8650,
288+
soc_model=soc_model,
287289
backend_options=backend_options,
288290
is_from_context_binary=True,
289291
)
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
# CLI Tool for Compile / Deploy Pre-Built QNN Artifacts
2+
3+
An easy-to-use tool for generating / executing .pte program from pre-built model libraries / context binaries from Qualcomm AI Engine Direct. Tool is verified with [host environement](../../../../docs/source/build-run-qualcomm-ai-engine-direct-backend.md#host-os).
4+
5+
## Description
6+
7+
This tool aims for users who want to leverage ExecuTorch runtime framework with their existent artifacts generated by QNN. It's possible for them to produce .pte program in few steps.<br/>
8+
If users are interested in well-known applications, [Qualcomm AI HUB](https://aihub.qualcomm.com/) is a great approach which provides tons of optimized state-of-the-art models ready for deploying. All of them could be downloaded in model library or context binary format.
9+
10+
* Model libraries(.so) came from `qnn-model-lib-generator` | AI HUB, or context binaries(.bin) came from `qnn-context-binary-generator` | AI HUB, could apply tool directly with:
11+
- To produce .pte program:
12+
```bash
13+
$ python export.py compile
14+
```
15+
- To perform inference with generated .pte program:
16+
```bash
17+
$ python export.py execute
18+
```
19+
20+
### Dependencies
21+
22+
* Register for Qualcomm AI HUB.
23+
* Download the corresponding QNN SDK via shit [link](https://www.qualcomm.com/developer/software/qualcomm-ai-engine-direct-sdk) which your favorite model is compiled with. Ths link will automatically download the latest version at this moment (users should be able to specify version soon, please refer to [this](../../../../docs/source/build-run-qualcomm-ai-engine-direct-backend.md#software) for earlier releases).
24+
25+
### Target Model
26+
27+
* Consider using [virtual environment](https://app.aihub.qualcomm.com/docs/hub/getting_started.html) for AI HUB scripts to prevent package conflict against ExecuTorch. Please finish the [installation section](https://app.aihub.qualcomm.com/docs/hub/getting_started.html#installation) before proceeding following steps.
28+
* Take [QuickSRNetLarge-Quantized](https://aihub.qualcomm.com/models/quicksrnetlarge_quantized?searchTerm=quantized) as an example, please [install](https://huggingface.co/qualcomm/QuickSRNetLarge-Quantized#installation) package as instructed.
29+
* Create workspace and export pre-built model library:
30+
```bash
31+
mkdir $MY_WS && cd $MY_WS
32+
# target chipset is `SM8650`
33+
python -m qai_hub_models.models.quicksrnetlarge_quantized.export --target-runtime qnn --chipset qualcomm-snapdragon-8gen3
34+
```
35+
* The compiled model library will be located under `$MY_WS/build/quicksrnetlarge_quantized/quicksrnetlarge_quantized.so`. This model library maps to the artifacts generated by SDK tools mentioned in `Integration workflow` section on [Qualcomm AI Engine Direct document](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/overview.html).
36+
37+
### Compiling Program
38+
39+
* Compile .pte program
40+
```bash
41+
# `pip install pydot` if package is missing
42+
# Note that device serial & hostname might not be required if given artifacts is in context binary format
43+
PYTHONPATH=$EXECUTORCH_ROOT/.. python $EXECUTORCH_ROOT/examples/qualcomm/qaihub_scripts/utils/export.py compile -a $MY_WS/build/quicksrnetlarge_quantized/quicksrnetlarge_quantized.so -m SM8650 -s $DEVICE_SERIAL -b $EXECUTORCH_ROOT/build-android
44+
```
45+
* Artifacts for checking IO information
46+
- `output_pte/quicksrnetlarge_quantized/quicksrnetlarge_quantized.json`
47+
- `output_pte/quicksrnetlarge_quantized/quicksrnetlarge_quantized.svg`
48+
49+
### Executing Program
50+
51+
* Prepare test image
52+
```bash
53+
cd $MY_WS
54+
wget https://user-images.githubusercontent.com/12981474/40157448-eff91f06-5953-11e8-9a37-f6b5693fa03f.png -O baboon.png
55+
```
56+
Execute following python script to generate input data:
57+
```python
58+
import torch
59+
import torchvision.transforms as transforms
60+
from PIL import Image
61+
img = Image.open('baboon.png').resize((128, 128))
62+
transform = transforms.Compose([transforms.PILToTensor()])
63+
# convert (C, H, W) to (N, H, W, C)
64+
# IO tensor info. could be checked with quicksrnetlarge_quantized.json | .svg
65+
img = transform(img).permute(1, 2, 0).unsqueeze(0)
66+
torch.save(img, 'baboon.pt')
67+
```
68+
* Execute .pte program
69+
```bash
70+
PYTHONPATH=$EXECUTORCH_ROOT/.. python $EXECUTORCH_ROOT/examples/qualcomm/qaihub_scripts/utils/export.py execute -p output_pte/quicksrnetlarge_quantized -i baboon.pt -s $DEVICE_SERIAL -b $EXECUTORCH_ROOT/build-android
71+
```
72+
* Post-process generated data
73+
```bash
74+
cd output_data
75+
```
76+
Execute following python script to generate output image:
77+
```python
78+
import io
79+
import torch
80+
import torchvision.transforms as transforms
81+
# IO tensor info. could be checked with quicksrnetlarge_quantized.json | .svg
82+
# generally we would have same layout for input / output tensors: e.g. either NHWC or NCHW
83+
# this might not be true under different converter configurations
84+
# learn more with converter tool from Qualcomm AI Engine Direct documentation
85+
# https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/tools.html#model-conversion
86+
with open('output__142.pt', 'rb') as f:
87+
buffer = io.BytesIO(f.read())
88+
img = torch.load(buffer, weights_only=False)
89+
transform = transforms.Compose([transforms.ToPILImage()])
90+
img_pil = transform(img.squeeze(0))
91+
img_pil.save('baboon_upscaled.png')
92+
```
93+
You could check the upscaled result now!
94+
95+
## Help
96+
97+
Please check help messages for more information:
98+
```bash
99+
PYTHONPATH=$EXECUTORCH_ROOT/.. python $EXECUTORCH_ROOT/examples/qualcomm/aihub/utils/export.py -h
100+
PYTHONPATH=$EXECUTORCH_ROOT/.. python $EXECUTORCH_ROOT/examples/qualcomm/aihub/utils/python export.py compile -h
101+
PYTHONPATH=$EXECUTORCH_ROOT/.. python $EXECUTORCH_ROOT/examples/qualcomm/aihub/utils/python export.py execute -h
102+
```

0 commit comments

Comments
 (0)