Skip to content

Commit a89479d

Browse files
committed
Qualcomm AI Engine Direct - context dump utility
summary: - utility for dumping compiled binaries (QNN context_binary / QCIR) - test cases
1 parent b1d76c9 commit a89479d

File tree

5 files changed

+283
-2
lines changed

5 files changed

+283
-2
lines changed

backends/qualcomm/aot/python/PyQnnManagerAdaptor.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ PYBIND11_MODULE(PyQnnManagerAdaptor, m) {
4949
.def("GetSpillFillBufferSize", &PyQnnManager::GetSpillFillBufferSize)
5050
.def(
5151
"MakeBinaryInfo",
52-
py::overload_cast<const py::bytes&>(&PyQnnManager::MakeBinaryInfo));
52+
py::overload_cast<const py::bytes&>(&PyQnnManager::MakeBinaryInfo))
53+
.def("StripProtocol", &PyQnnManager::StripProtocol);
5354
}
5455
} // namespace qnn
5556
} // namespace backends

backends/qualcomm/aot/python/PyQnnManagerAdaptor.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,41 @@ class PyQnnManager {
390390
return result;
391391
}
392392

393+
py::array_t<char> StripProtocol(const py::bytes& preprocessed_binary) {
394+
py::buffer_info info(py::buffer(preprocessed_binary).request());
395+
396+
void* buf_ptr = nullptr;
397+
size_t buf_size = 0;
398+
// check if it's a qnn context binary
399+
auto [status, signature, ctx_size, ctx_bin] =
400+
QnnContextCustomProtocol().DeserializeContextCustomBuffer(info.ptr);
401+
402+
if (status == Error::Ok) {
403+
buf_size = ctx_size;
404+
buf_ptr = ctx_bin;
405+
} else {
406+
// check if it's a qcir flatbuffers, return fbs if matched
407+
auto
408+
[status,
409+
qcir_fbs_size,
410+
qcir_tensor_size,
411+
qcir_fbs_ptr,
412+
qcir_tensor_ptr] =
413+
QnnQcirCustomProtocol().DeserializeQcirCustomBuffer(info.ptr);
414+
if (status == Error::Ok) {
415+
buf_size = qcir_fbs_size;
416+
buf_ptr = qcir_fbs_ptr;
417+
} else {
418+
// the format should be DLC, return nothing here
419+
return py::array_t<char>(0);
420+
}
421+
}
422+
auto result = py::array_t<char>(buf_size);
423+
auto result_buffer = result.request();
424+
std::memcpy(result_buffer.ptr, buf_ptr, buf_size);
425+
return result;
426+
}
427+
393428
private:
394429
// Store the bytes object instead of a raw pointer so that this module will
395430
// keep the bytes alive.

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 157 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
QuantDtype,
2121
TestQNN,
2222
to_backend,
23+
validate_context_binary,
24+
validate_qcir,
2325
)
2426
from executorch.backends.qualcomm.utils.constants import (
2527
QCOM_ANNOTATION,
@@ -30,10 +32,12 @@
3032

3133
from executorch.backends.qualcomm.utils.utils import (
3234
capture_program,
35+
dump_context_from_pte,
3336
from_context_binary,
3437
generate_htp_compiler_spec,
3538
generate_multi_graph_program,
3639
generate_qnn_executorch_compiler_spec,
40+
PyQnnManagerAdaptor,
3741
skip_annotation,
3842
update_spill_fill_size,
3943
)
@@ -2041,6 +2045,81 @@ def test_qnn_backend_context_direct(self):
20412045
bundle_program["edge_program_manager"].to_executorch(),
20422046
)
20432047

2048+
def test_qnn_backend_context_extraction(self):
2049+
from executorch.exir import EdgeCompileConfig, EdgeProgramManager
2050+
2051+
module = SimpleModel() # noqa: F405
2052+
sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
2053+
backend_options = generate_htp_compiler_spec(use_fp16=True)
2054+
compiler_specs = [
2055+
self.compiler_specs,
2056+
generate_qnn_executorch_compiler_spec(
2057+
soc_model=self.chipset_table[TestQNN.model],
2058+
backend_options=backend_options,
2059+
online_prepare=True,
2060+
),
2061+
]
2062+
validators = [validate_context_binary, validate_qcir]
2063+
2064+
for compiler_spec, validate in zip(compiler_specs, validators):
2065+
edge_prog_mgr = EdgeProgramManager(
2066+
edge_programs={
2067+
"forward": capture_program(module, sample_input).exported_program
2068+
},
2069+
compile_config=EdgeCompileConfig(_use_edge_ops=False),
2070+
).to_backend(QnnPartitioner(compiler_spec))
2071+
lowered_module = edge_prog_mgr.exported_program().graph_module._modules[
2072+
"lowered_module_0"
2073+
]
2074+
qnn_mgr = PyQnnManagerAdaptor.QnnManager(
2075+
lowered_module.compile_specs[0].value
2076+
)
2077+
qnn_mgr.Init()
2078+
binary = qnn_mgr.StripProtocol(lowered_module.processed_bytes)
2079+
validate(binary)
2080+
2081+
def test_qnn_backend_dump_context_from_pte(self):
2082+
from executorch.exir import EdgeCompileConfig, EdgeProgramManager
2083+
2084+
module = SimpleModel() # noqa: F405
2085+
sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
2086+
backend_options = generate_htp_compiler_spec(use_fp16=True)
2087+
compiler_specs = [
2088+
self.compiler_specs,
2089+
generate_qnn_executorch_compiler_spec(
2090+
soc_model=self.chipset_table[TestQNN.model],
2091+
backend_options=backend_options,
2092+
online_prepare=True,
2093+
),
2094+
]
2095+
validators = [validate_context_binary, validate_qcir]
2096+
2097+
for compiler_spec, validate in zip(compiler_specs, validators):
2098+
edge_prog_mgr = (
2099+
EdgeProgramManager(
2100+
edge_programs={
2101+
"forward": capture_program(
2102+
module, sample_input
2103+
).exported_program
2104+
},
2105+
compile_config=EdgeCompileConfig(_use_edge_ops=False),
2106+
)
2107+
.to_backend(QnnPartitioner(compiler_spec))
2108+
.to_executorch()
2109+
)
2110+
2111+
with tempfile.TemporaryDirectory() as tmp_dir:
2112+
pte_path = f"{tmp_dir}/model.pte"
2113+
with open(pte_path, "wb") as f:
2114+
edge_prog_mgr.write_to_file(f)
2115+
2116+
dump_context_from_pte(pte_path)
2117+
binary_name = f"{tmp_dir}/forward_0.bin"
2118+
self.assertTrue(os.path.isfile(binary_name))
2119+
with open(binary_name, "rb") as f:
2120+
stripped_binary = f.read()
2121+
validate(stripped_binary)
2122+
20442123
def test_qnn_backend_draw_graph(self):
20452124
golden_data = """digraph test {
20462125
rankdir=TB
@@ -2433,7 +2512,7 @@ def test_qnn_backend_multi_graphs(self):
24332512
for module, sample_input in zip(modules, sample_inputs)
24342513
]
24352514
backend_options = generate_htp_compiler_spec(
2436-
use_fp16=True,
2515+
use_fp16=False,
24372516
)
24382517
compiler_specs = [
24392518
generate_qnn_executorch_compiler_spec(
@@ -2532,6 +2611,83 @@ def test_qnn_backend_context_direct(self):
25322611
bundle_program["edge_program_manager"].to_executorch(),
25332612
)
25342613

2614+
def test_qnn_backend_context_extraction(self):
2615+
from executorch.exir import EdgeCompileConfig, EdgeProgramManager
2616+
2617+
module = SimpleModel() # noqa: F405
2618+
sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
2619+
module = self.get_qdq_module(module, sample_input)
2620+
backend_options = generate_htp_compiler_spec(use_fp16=False)
2621+
compiler_specs = [
2622+
self.compiler_specs,
2623+
generate_qnn_executorch_compiler_spec(
2624+
soc_model=self.chipset_table[TestQNN.model],
2625+
backend_options=backend_options,
2626+
online_prepare=True,
2627+
),
2628+
]
2629+
validators = [validate_context_binary, validate_qcir]
2630+
2631+
for compiler_spec, validate in zip(compiler_specs, validators):
2632+
edge_prog_mgr = EdgeProgramManager(
2633+
edge_programs={
2634+
"forward": capture_program(module, sample_input).exported_program
2635+
},
2636+
compile_config=EdgeCompileConfig(_use_edge_ops=False),
2637+
).to_backend(QnnPartitioner(compiler_spec))
2638+
lowered_module = edge_prog_mgr.exported_program().graph_module._modules[
2639+
"lowered_module_0"
2640+
]
2641+
qnn_mgr = PyQnnManagerAdaptor.QnnManager(
2642+
lowered_module.compile_specs[0].value
2643+
)
2644+
qnn_mgr.Init()
2645+
binary = qnn_mgr.StripProtocol(lowered_module.processed_bytes)
2646+
validate(binary)
2647+
2648+
def test_qnn_backend_dump_context_from_pte(self):
2649+
from executorch.exir import EdgeCompileConfig, EdgeProgramManager
2650+
2651+
module = SimpleModel() # noqa: F405
2652+
sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
2653+
module = self.get_qdq_module(module, sample_input)
2654+
backend_options = generate_htp_compiler_spec(use_fp16=True)
2655+
compiler_specs = [
2656+
self.compiler_specs,
2657+
generate_qnn_executorch_compiler_spec(
2658+
soc_model=self.chipset_table[TestQNN.model],
2659+
backend_options=backend_options,
2660+
online_prepare=True,
2661+
),
2662+
]
2663+
validators = [validate_context_binary, validate_qcir]
2664+
2665+
for compiler_spec, validate in zip(compiler_specs, validators):
2666+
edge_prog_mgr = (
2667+
EdgeProgramManager(
2668+
edge_programs={
2669+
"forward": capture_program(
2670+
module, sample_input
2671+
).exported_program
2672+
},
2673+
compile_config=EdgeCompileConfig(_use_edge_ops=False),
2674+
)
2675+
.to_backend(QnnPartitioner(compiler_spec))
2676+
.to_executorch()
2677+
)
2678+
2679+
with tempfile.TemporaryDirectory() as tmp_dir:
2680+
pte_path = f"{tmp_dir}/model.pte"
2681+
with open(pte_path, "wb") as f:
2682+
edge_prog_mgr.write_to_file(f)
2683+
2684+
dump_context_from_pte(pte_path)
2685+
binary_name = f"{tmp_dir}/forward_0.bin"
2686+
self.assertTrue(os.path.isfile(binary_name))
2687+
with open(binary_name, "rb") as f:
2688+
stripped_binary = f.read()
2689+
validate(stripped_binary)
2690+
25352691
def test_qnn_backend_draw_graph(self):
25362692
golden_data = """digraph test {
25372693
rankdir=TB

backends/qualcomm/tests/utils.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,57 @@ def generate_context_binary(
108108
assert os.path.isfile(f"{artifact_dir}/model_ctx.bin"), print(result.stderr)
109109

110110

111+
def validate_context_binary(ctx_bin: bytes):
112+
qnn_sdk = os.environ.get("QNN_SDK_ROOT", None)
113+
assert qnn_sdk, "QNN_SDK_ROOT was not found in environment variable"
114+
115+
# flow of qnn tools
116+
with tempfile.TemporaryDirectory() as tmp_dir:
117+
with open(f"{tmp_dir}/ctx.bin", "wb") as binary_file:
118+
binary_file.write(ctx_bin)
119+
120+
target = "x86_64-linux-clang"
121+
cmds = [
122+
# qnn-context-binary-utility
123+
f"{qnn_sdk}/bin/{target}/qnn-context-binary-utility",
124+
"--context_binary",
125+
f"{tmp_dir}/ctx.bin",
126+
"--json_file",
127+
f"{tmp_dir}/ctx.json",
128+
]
129+
result = subprocess.run(
130+
" ".join(cmds),
131+
shell=True,
132+
executable="/bin/bash",
133+
capture_output=True,
134+
)
135+
assert os.path.isfile(f"{tmp_dir}/ctx.json"), print(result.stderr)
136+
137+
138+
def validate_qcir(qcir: bytes):
139+
with tempfile.TemporaryDirectory() as tmp_dir:
140+
with open(f"{tmp_dir}/qcir.bin", "wb") as binary_file:
141+
binary_file.write(qcir)
142+
143+
cmds = [
144+
"flatc",
145+
"-o",
146+
tmp_dir,
147+
"--raw-binary",
148+
"-t",
149+
f"{os.path.dirname(__file__)}/../aot/ir/qcir.fbs",
150+
"--",
151+
f"{tmp_dir}/qcir.bin",
152+
]
153+
result = subprocess.run(
154+
" ".join(cmds),
155+
shell=True,
156+
executable="/bin/bash",
157+
capture_output=True,
158+
)
159+
assert os.path.isfile(f"{tmp_dir}/qcir.json"), print(result.stderr)
160+
161+
111162
class TestQNN(unittest.TestCase):
112163
rtol: float = 0
113164
atol: float = 0

backends/qualcomm/utils/utils.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,44 @@ def replace_linear(module: torch.nn.Module):
218218
return replace_linear(module)
219219

220220

221+
def dump_context_from_pte(pte_path):
222+
"""
223+
Dump compiled binaries under the same directory of pte_path.
224+
For partitioned graph, there will be multiple files with names f"{graph_name}_{index}".
225+
Where 'graph_name' comes from the compiler_specs and 'index' represents the execution order.
226+
227+
Args:
228+
pte_path (str): The path of generated pte.
229+
"""
230+
import os
231+
232+
from executorch.exir._serialize._program import deserialize_pte_binary
233+
234+
with open(pte_path, "rb") as f:
235+
program_data = f.read()
236+
237+
program = deserialize_pte_binary(program_data)
238+
239+
ctx_path = os.path.dirname(pte_path)
240+
dummy_compiler_specs = generate_qnn_executorch_compiler_spec(
241+
soc_model=QcomChipset.SM8650,
242+
backend_options=generate_htp_compiler_spec(use_fp16=False),
243+
)
244+
qnn_mgr = PyQnnManagerAdaptor.QnnManager(
245+
generate_qnn_executorch_option(dummy_compiler_specs)
246+
)
247+
qnn_mgr.Init()
248+
for execution_plan in program.execution_plan:
249+
for i, delegate in enumerate(execution_plan.delegates):
250+
if delegate.id == "QnnBackend":
251+
processed_bytes = program.backend_delegate_data[
252+
delegate.processed.index
253+
].data
254+
binary = qnn_mgr.StripProtocol(processed_bytes)
255+
with open(f"{ctx_path}/{execution_plan.name}_{i}.bin", "wb") as f:
256+
f.write(binary)
257+
258+
221259
def update_spill_fill_size(
222260
exported_program: ExportedProgram | List[LoweredBackendModule],
223261
):

0 commit comments

Comments
 (0)