Skip to content

Commit 9fc0b96

Browse files
committed
Qualcomm AI Engine Direct - context dump utility
summary: - utility for dumping compiled binaries (QNN context_binary / QCIR) - test cases
1 parent 62e49ce commit 9fc0b96

File tree

5 files changed

+285
-2
lines changed

5 files changed

+285
-2
lines changed

backends/qualcomm/aot/python/PyQnnManagerAdaptor.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ PYBIND11_MODULE(PyQnnManagerAdaptor, m) {
4949
.def("GetSpillFillBufferSize", &PyQnnManager::GetSpillFillBufferSize)
5050
.def(
5151
"MakeBinaryInfo",
52-
py::overload_cast<const py::bytes&>(&PyQnnManager::MakeBinaryInfo));
52+
py::overload_cast<const py::bytes&>(&PyQnnManager::MakeBinaryInfo))
53+
.def("StripProtocol", &PyQnnManager::StripProtocol);
5354
}
5455
} // namespace qnn
5556
} // namespace backends

backends/qualcomm/aot/python/PyQnnManagerAdaptor.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,43 @@ class PyQnnManager {
390390
return result;
391391
}
392392

393+
py::array_t<char> StripProtocol(const py::bytes& preprocessed_binary) {
394+
py::buffer_info info(py::buffer(preprocessed_binary).request());
395+
QnnExecuTorchContextBinary binary(
396+
{info.ptr, static_cast<uint64_t>(info.size * info.itemsize)});
397+
398+
void* buf_ptr = nullptr;
399+
size_t buf_size = 0;
400+
// check if it's a qnn context binary
401+
auto [status, signature, ctx_size, ctx_bin] =
402+
QnnContextCustomProtocol().DeserializeContextCustomBuffer(info.ptr);
403+
404+
if (status == Error::Ok) {
405+
buf_size = ctx_size;
406+
buf_ptr = ctx_bin;
407+
} else {
408+
// check if it's a qcir flatbuffers, return fbs if matched
409+
auto
410+
[status,
411+
qcir_fbs_size,
412+
qcir_tensor_size,
413+
qcir_fbs_ptr,
414+
qcir_tensor_ptr] =
415+
QnnQcirCustomProtocol().DeserializeQcirCustomBuffer(info.ptr);
416+
if (status == Error::Ok) {
417+
buf_size = qcir_fbs_size;
418+
buf_ptr = qcir_fbs_ptr;
419+
} else {
420+
// the format should be DLC, return nothing here
421+
return py::array_t<char>(0);
422+
}
423+
}
424+
auto result = py::array_t<char>(buf_size);
425+
auto result_buffer = result.request();
426+
std::memcpy(result_buffer.ptr, buf_ptr, buf_size);
427+
return result;
428+
}
429+
393430
private:
394431
// Store the bytes object instead of a raw pointer so that this module will
395432
// keep the bytes alive.

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 157 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
QuantDtype,
2121
TestQNN,
2222
to_backend,
23+
validate_context_binary,
24+
validate_qcir,
2325
)
2426
from executorch.backends.qualcomm.utils.constants import (
2527
QCOM_ANNOTATION,
@@ -30,10 +32,12 @@
3032

3133
from executorch.backends.qualcomm.utils.utils import (
3234
capture_program,
35+
dump_context_from_pte,
3336
from_context_binary,
3437
generate_htp_compiler_spec,
3538
generate_multi_graph_program,
3639
generate_qnn_executorch_compiler_spec,
40+
PyQnnManagerAdaptor,
3741
skip_annotation,
3842
update_spill_fill_size,
3943
)
@@ -2030,6 +2034,81 @@ def test_qnn_backend_context_direct(self):
20302034
bundle_program["edge_program_manager"].to_executorch(),
20312035
)
20322036

2037+
def test_qnn_backend_context_extraction(self):
2038+
from executorch.exir import EdgeCompileConfig, EdgeProgramManager
2039+
2040+
module = SimpleModel() # noqa: F405
2041+
sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
2042+
backend_options = generate_htp_compiler_spec(use_fp16=True)
2043+
compiler_specs = [
2044+
self.compiler_specs,
2045+
generate_qnn_executorch_compiler_spec(
2046+
soc_model=self.chipset_table[TestQNN.model],
2047+
backend_options=backend_options,
2048+
online_prepare=True,
2049+
),
2050+
]
2051+
validators = [validate_context_binary, validate_qcir]
2052+
2053+
for compiler_spec, validate in zip(compiler_specs, validators):
2054+
edge_prog_mgr = EdgeProgramManager(
2055+
edge_programs={
2056+
"forward": capture_program(module, sample_input).exported_program
2057+
},
2058+
compile_config=EdgeCompileConfig(_use_edge_ops=False),
2059+
).to_backend(QnnPartitioner(compiler_spec))
2060+
lowered_module = edge_prog_mgr.exported_program().graph_module._modules[
2061+
"lowered_module_0"
2062+
]
2063+
qnn_mgr = PyQnnManagerAdaptor.QnnManager(
2064+
lowered_module.compile_specs[0].value
2065+
)
2066+
qnn_mgr.Init()
2067+
binary = qnn_mgr.StripProtocol(lowered_module.processed_bytes)
2068+
validate(binary)
2069+
2070+
def test_qnn_backend_dump_context_from_pte(self):
2071+
from executorch.exir import EdgeCompileConfig, EdgeProgramManager
2072+
2073+
module = SimpleModel() # noqa: F405
2074+
sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
2075+
backend_options = generate_htp_compiler_spec(use_fp16=True)
2076+
compiler_specs = [
2077+
self.compiler_specs,
2078+
generate_qnn_executorch_compiler_spec(
2079+
soc_model=self.chipset_table[TestQNN.model],
2080+
backend_options=backend_options,
2081+
online_prepare=True,
2082+
),
2083+
]
2084+
validators = [validate_context_binary, validate_qcir]
2085+
2086+
for compiler_spec, validate in zip(compiler_specs, validators):
2087+
edge_prog_mgr = (
2088+
EdgeProgramManager(
2089+
edge_programs={
2090+
"forward": capture_program(
2091+
module, sample_input
2092+
).exported_program
2093+
},
2094+
compile_config=EdgeCompileConfig(_use_edge_ops=False),
2095+
)
2096+
.to_backend(QnnPartitioner(compiler_spec))
2097+
.to_executorch()
2098+
)
2099+
2100+
with tempfile.TemporaryDirectory() as tmp_dir:
2101+
pte_path = f"{tmp_dir}/model.pte"
2102+
with open(pte_path, "wb") as f:
2103+
edge_prog_mgr.write_to_file(f)
2104+
2105+
dump_context_from_pte(pte_path)
2106+
binary_name = f"{tmp_dir}/forward_0.bin"
2107+
self.assertTrue(os.path.isfile(binary_name))
2108+
with open(binary_name, "rb") as f:
2109+
stripped_binary = f.read()
2110+
validate(stripped_binary)
2111+
20332112
def test_qnn_backend_draw_graph(self):
20342113
golden_data = """digraph test {
20352114
rankdir=TB
@@ -2422,7 +2501,7 @@ def test_qnn_backend_multi_graphs(self):
24222501
for module, sample_input in zip(modules, sample_inputs)
24232502
]
24242503
backend_options = generate_htp_compiler_spec(
2425-
use_fp16=True,
2504+
use_fp16=False,
24262505
)
24272506
compiler_specs = [
24282507
generate_qnn_executorch_compiler_spec(
@@ -2521,6 +2600,83 @@ def test_qnn_backend_context_direct(self):
25212600
bundle_program["edge_program_manager"].to_executorch(),
25222601
)
25232602

2603+
def test_qnn_backend_context_extraction(self):
2604+
from executorch.exir import EdgeCompileConfig, EdgeProgramManager
2605+
2606+
module = SimpleModel() # noqa: F405
2607+
sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
2608+
module = self.get_qdq_module(module, sample_input)
2609+
backend_options = generate_htp_compiler_spec(use_fp16=False)
2610+
compiler_specs = [
2611+
self.compiler_specs,
2612+
generate_qnn_executorch_compiler_spec(
2613+
soc_model=self.chipset_table[TestQNN.model],
2614+
backend_options=backend_options,
2615+
online_prepare=True,
2616+
),
2617+
]
2618+
validators = [validate_context_binary, validate_qcir]
2619+
2620+
for compiler_spec, validate in zip(compiler_specs, validators):
2621+
edge_prog_mgr = EdgeProgramManager(
2622+
edge_programs={
2623+
"forward": capture_program(module, sample_input).exported_program
2624+
},
2625+
compile_config=EdgeCompileConfig(_use_edge_ops=False),
2626+
).to_backend(QnnPartitioner(compiler_spec))
2627+
lowered_module = edge_prog_mgr.exported_program().graph_module._modules[
2628+
"lowered_module_0"
2629+
]
2630+
qnn_mgr = PyQnnManagerAdaptor.QnnManager(
2631+
lowered_module.compile_specs[0].value
2632+
)
2633+
qnn_mgr.Init()
2634+
binary = qnn_mgr.StripProtocol(lowered_module.processed_bytes)
2635+
validate(binary)
2636+
2637+
def test_qnn_backend_dump_context_from_pte(self):
2638+
from executorch.exir import EdgeCompileConfig, EdgeProgramManager
2639+
2640+
module = SimpleModel() # noqa: F405
2641+
sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
2642+
module = self.get_qdq_module(module, sample_input)
2643+
backend_options = generate_htp_compiler_spec(use_fp16=True)
2644+
compiler_specs = [
2645+
self.compiler_specs,
2646+
generate_qnn_executorch_compiler_spec(
2647+
soc_model=self.chipset_table[TestQNN.model],
2648+
backend_options=backend_options,
2649+
online_prepare=True,
2650+
),
2651+
]
2652+
validators = [validate_context_binary, validate_qcir]
2653+
2654+
for compiler_spec, validate in zip(compiler_specs, validators):
2655+
edge_prog_mgr = (
2656+
EdgeProgramManager(
2657+
edge_programs={
2658+
"forward": capture_program(
2659+
module, sample_input
2660+
).exported_program
2661+
},
2662+
compile_config=EdgeCompileConfig(_use_edge_ops=False),
2663+
)
2664+
.to_backend(QnnPartitioner(compiler_spec))
2665+
.to_executorch()
2666+
)
2667+
2668+
with tempfile.TemporaryDirectory() as tmp_dir:
2669+
pte_path = f"{tmp_dir}/model.pte"
2670+
with open(pte_path, "wb") as f:
2671+
edge_prog_mgr.write_to_file(f)
2672+
2673+
dump_context_from_pte(pte_path)
2674+
binary_name = f"{tmp_dir}/forward_0.bin"
2675+
self.assertTrue(os.path.isfile(binary_name))
2676+
with open(binary_name, "rb") as f:
2677+
stripped_binary = f.read()
2678+
validate(stripped_binary)
2679+
25242680
def test_qnn_backend_draw_graph(self):
25252681
golden_data = """digraph test {
25262682
rankdir=TB

backends/qualcomm/tests/utils.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,57 @@ def generate_context_binary(
108108
assert os.path.isfile(f"{artifact_dir}/model_ctx.bin"), print(result.stderr)
109109

110110

111+
def validate_context_binary(ctx_bin: bytes):
112+
qnn_sdk = os.environ.get("QNN_SDK_ROOT", None)
113+
assert qnn_sdk, "QNN_SDK_ROOT was not found in environment variable"
114+
115+
# flow of qnn tools
116+
with tempfile.TemporaryDirectory() as tmp_dir:
117+
with open(f"{tmp_dir}/ctx.bin", "wb") as binary_file:
118+
binary_file.write(ctx_bin)
119+
120+
target = "x86_64-linux-clang"
121+
cmds = [
122+
# qnn-context-binary-utility
123+
f"{qnn_sdk}/bin/{target}/qnn-context-binary-utility",
124+
"--context_binary",
125+
f"{tmp_dir}/ctx.bin",
126+
"--json_file",
127+
f"{tmp_dir}/ctx.json",
128+
]
129+
result = subprocess.run(
130+
" ".join(cmds),
131+
shell=True,
132+
executable="/bin/bash",
133+
capture_output=True,
134+
)
135+
assert os.path.isfile(f"{tmp_dir}/ctx.json"), print(result.stderr)
136+
137+
138+
def validate_qcir(qcir: bytes):
139+
with tempfile.TemporaryDirectory() as tmp_dir:
140+
with open(f"{tmp_dir}/qcir.bin", "wb") as binary_file:
141+
binary_file.write(qcir)
142+
143+
cmds = [
144+
"flatc",
145+
"-o",
146+
tmp_dir,
147+
"--raw-binary",
148+
"-t",
149+
f"{os.path.dirname(__file__)}/../aot/ir/qcir.fbs",
150+
"--",
151+
f"{tmp_dir}/qcir.bin",
152+
]
153+
result = subprocess.run(
154+
" ".join(cmds),
155+
shell=True,
156+
executable="/bin/bash",
157+
capture_output=True,
158+
)
159+
assert os.path.isfile(f"{tmp_dir}/qcir.json"), print(result.stderr)
160+
161+
111162
class TestQNN(unittest.TestCase):
112163
rtol: float = 0
113164
atol: float = 0

backends/qualcomm/utils/utils.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,44 @@ def replace_linear(module: torch.nn.Module):
218218
return replace_linear(module)
219219

220220

221+
def dump_context_from_pte(pte_path):
222+
"""
223+
Dump compiled binaries under the same directory of pte_path.
224+
For partitioned graph, there will be multiple files with names f"{graph_name}_{index}".
225+
Where 'graph_name' comes from the compiler_specs and 'index' represents the execution order.
226+
227+
Args:
228+
pte_path (str): The path of generated pte.
229+
"""
230+
import os
231+
232+
from executorch.exir._serialize._program import deserialize_pte_binary
233+
234+
with open(pte_path, "rb") as f:
235+
program_data = f.read()
236+
237+
program = deserialize_pte_binary(program_data)
238+
239+
ctx_path = os.path.dirname(pte_path)
240+
dummy_compiler_specs = generate_qnn_executorch_compiler_spec(
241+
soc_model=QcomChipset.SM8650,
242+
backend_options=generate_htp_compiler_spec(use_fp16=False),
243+
)
244+
qnn_mgr = PyQnnManagerAdaptor.QnnManager(
245+
generate_qnn_executorch_option(dummy_compiler_specs)
246+
)
247+
qnn_mgr.Init()
248+
for execution_plan in program.execution_plan:
249+
for i, delegate in enumerate(execution_plan.delegates):
250+
if delegate.id == "QnnBackend":
251+
processed_bytes = program.backend_delegate_data[
252+
delegate.processed.index
253+
].data
254+
binary = qnn_mgr.StripProtocol(processed_bytes)
255+
with open(f"{ctx_path}/{execution_plan.name}_{i}.bin", "wb") as f:
256+
f.write(binary)
257+
258+
221259
def update_spill_fill_size(
222260
exported_program: ExportedProgram | List[LoweredBackendModule],
223261
):

0 commit comments

Comments
 (0)