Skip to content

Commit c4dfc68

Browse files
committed
Qualcomm AI Engine Direct - context dump utility
summary: - utility for dumping compiled binaries (QNN context_binary / QCIR) - test cases
1 parent a5c7609 commit c4dfc68

File tree

5 files changed

+285
-2
lines changed

5 files changed

+285
-2
lines changed

backends/qualcomm/aot/python/PyQnnManagerAdaptor.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ PYBIND11_MODULE(PyQnnManagerAdaptor, m) {
4949
.def("GetSpillFillBufferSize", &PyQnnManager::GetSpillFillBufferSize)
5050
.def(
5151
"MakeBinaryInfo",
52-
py::overload_cast<const py::bytes&>(&PyQnnManager::MakeBinaryInfo));
52+
py::overload_cast<const py::bytes&>(&PyQnnManager::MakeBinaryInfo))
53+
.def("StripProtocol", &PyQnnManager::StripProtocol);
5354
}
5455
} // namespace qnn
5556
} // namespace backends

backends/qualcomm/aot/python/PyQnnManagerAdaptor.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,43 @@ class PyQnnManager {
390390
return result;
391391
}
392392

393+
py::array_t<char> StripProtocol(const py::bytes& preprocessed_binary) {
394+
py::buffer_info info(py::buffer(preprocessed_binary).request());
395+
QnnExecuTorchContextBinary binary(
396+
{info.ptr, static_cast<uint64_t>(info.size * info.itemsize)});
397+
398+
void* buf_ptr = nullptr;
399+
size_t buf_size = 0;
400+
// check if it's a qnn context binary
401+
auto [status, signature, ctx_size, ctx_bin] =
402+
QnnContextCustomProtocol().DeserializeContextCustomBuffer(info.ptr);
403+
404+
if (status == Error::Ok) {
405+
buf_size = ctx_size;
406+
buf_ptr = ctx_bin;
407+
} else {
408+
// check if it's a qcir flatbuffers, return fbs if matched
409+
auto
410+
[status,
411+
qcir_fbs_size,
412+
qcir_tensor_size,
413+
qcir_fbs_ptr,
414+
qcir_tensor_ptr] =
415+
QnnQcirCustomProtocol().DeserializeQcirCustomBuffer(info.ptr);
416+
if (status == Error::Ok) {
417+
buf_size = qcir_fbs_size;
418+
buf_ptr = qcir_fbs_ptr;
419+
} else {
420+
// the format should be DLC, return nothing here
421+
return py::array_t<char>(0);
422+
}
423+
}
424+
auto result = py::array_t<char>(buf_size);
425+
auto result_buffer = result.request();
426+
std::memcpy(result_buffer.ptr, buf_ptr, buf_size);
427+
return result;
428+
}
429+
393430
private:
394431
// Store the bytes object instead of a raw pointer so that this module will
395432
// keep the bytes alive.

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 157 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
QuantDtype,
2121
TestQNN,
2222
to_backend,
23+
validate_context_binary,
24+
validate_qcir,
2325
)
2426
from executorch.backends.qualcomm.utils.constants import (
2527
QCOM_ANNOTATION,
@@ -30,10 +32,12 @@
3032

3133
from executorch.backends.qualcomm.utils.utils import (
3234
capture_program,
35+
dump_context_from_pte,
3336
from_context_binary,
3437
generate_htp_compiler_spec,
3538
generate_multi_graph_program,
3639
generate_qnn_executorch_compiler_spec,
40+
PyQnnManagerAdaptor,
3741
skip_annotation,
3842
update_spill_fill_size,
3943
)
@@ -2019,6 +2023,81 @@ def test_qnn_backend_context_direct(self):
20192023
bundle_program["edge_program_manager"].to_executorch(),
20202024
)
20212025

2026+
def test_qnn_backend_context_extraction(self):
2027+
from executorch.exir import EdgeCompileConfig, EdgeProgramManager
2028+
2029+
module = SimpleModel() # noqa: F405
2030+
sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
2031+
backend_options = generate_htp_compiler_spec(use_fp16=True)
2032+
compiler_specs = [
2033+
self.compiler_specs,
2034+
generate_qnn_executorch_compiler_spec(
2035+
soc_model=self.chipset_table[TestQNN.model],
2036+
backend_options=backend_options,
2037+
online_prepare=True,
2038+
),
2039+
]
2040+
validators = [validate_context_binary, validate_qcir]
2041+
2042+
for compiler_spec, validate in zip(compiler_specs, validators):
2043+
edge_prog_mgr = EdgeProgramManager(
2044+
edge_programs={
2045+
"forward": capture_program(module, sample_input).exported_program
2046+
},
2047+
compile_config=EdgeCompileConfig(_use_edge_ops=False),
2048+
).to_backend(QnnPartitioner(compiler_spec))
2049+
lowered_module = edge_prog_mgr.exported_program().graph_module._modules[
2050+
"lowered_module_0"
2051+
]
2052+
qnn_mgr = PyQnnManagerAdaptor.QnnManager(
2053+
lowered_module.compile_specs[0].value
2054+
)
2055+
qnn_mgr.Init()
2056+
binary = qnn_mgr.StripProtocol(lowered_module.processed_bytes)
2057+
validate(binary)
2058+
2059+
def test_qnn_backend_dump_context_from_pte(self):
2060+
from executorch.exir import EdgeCompileConfig, EdgeProgramManager
2061+
2062+
module = SimpleModel() # noqa: F405
2063+
sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
2064+
backend_options = generate_htp_compiler_spec(use_fp16=True)
2065+
compiler_specs = [
2066+
self.compiler_specs,
2067+
generate_qnn_executorch_compiler_spec(
2068+
soc_model=self.chipset_table[TestQNN.model],
2069+
backend_options=backend_options,
2070+
online_prepare=True,
2071+
),
2072+
]
2073+
validators = [validate_context_binary, validate_qcir]
2074+
2075+
for compiler_spec, validate in zip(compiler_specs, validators):
2076+
edge_prog_mgr = (
2077+
EdgeProgramManager(
2078+
edge_programs={
2079+
"forward": capture_program(
2080+
module, sample_input
2081+
).exported_program
2082+
},
2083+
compile_config=EdgeCompileConfig(_use_edge_ops=False),
2084+
)
2085+
.to_backend(QnnPartitioner(compiler_spec))
2086+
.to_executorch()
2087+
)
2088+
2089+
with tempfile.TemporaryDirectory() as tmp_dir:
2090+
pte_path = f"{tmp_dir}/model.pte"
2091+
with open(pte_path, "wb") as f:
2092+
edge_prog_mgr.write_to_file(f)
2093+
2094+
dump_context_from_pte(pte_path)
2095+
binary_name = f"{tmp_dir}/forward_0.bin"
2096+
self.assertTrue(os.path.isfile(binary_name))
2097+
with open(binary_name, "rb") as f:
2098+
stripped_binary = f.read()
2099+
validate(stripped_binary)
2100+
20222101
def test_qnn_backend_draw_graph(self):
20232102
golden_data = """digraph test {
20242103
rankdir=TB
@@ -2411,7 +2490,7 @@ def test_qnn_backend_multi_graphs(self):
24112490
for module, sample_input in zip(modules, sample_inputs)
24122491
]
24132492
backend_options = generate_htp_compiler_spec(
2414-
use_fp16=True,
2493+
use_fp16=False,
24152494
)
24162495
compiler_specs = [
24172496
generate_qnn_executorch_compiler_spec(
@@ -2510,6 +2589,83 @@ def test_qnn_backend_context_direct(self):
25102589
bundle_program["edge_program_manager"].to_executorch(),
25112590
)
25122591

2592+
def test_qnn_backend_context_extraction(self):
2593+
from executorch.exir import EdgeCompileConfig, EdgeProgramManager
2594+
2595+
module = SimpleModel() # noqa: F405
2596+
sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
2597+
module = self.get_qdq_module(module, sample_input)
2598+
backend_options = generate_htp_compiler_spec(use_fp16=False)
2599+
compiler_specs = [
2600+
self.compiler_specs,
2601+
generate_qnn_executorch_compiler_spec(
2602+
soc_model=self.chipset_table[TestQNN.model],
2603+
backend_options=backend_options,
2604+
online_prepare=True,
2605+
),
2606+
]
2607+
validators = [validate_context_binary, validate_qcir]
2608+
2609+
for compiler_spec, validate in zip(compiler_specs, validators):
2610+
edge_prog_mgr = EdgeProgramManager(
2611+
edge_programs={
2612+
"forward": capture_program(module, sample_input).exported_program
2613+
},
2614+
compile_config=EdgeCompileConfig(_use_edge_ops=False),
2615+
).to_backend(QnnPartitioner(compiler_spec))
2616+
lowered_module = edge_prog_mgr.exported_program().graph_module._modules[
2617+
"lowered_module_0"
2618+
]
2619+
qnn_mgr = PyQnnManagerAdaptor.QnnManager(
2620+
lowered_module.compile_specs[0].value
2621+
)
2622+
qnn_mgr.Init()
2623+
binary = qnn_mgr.StripProtocol(lowered_module.processed_bytes)
2624+
validate(binary)
2625+
2626+
def test_qnn_backend_dump_context_from_pte(self):
2627+
from executorch.exir import EdgeCompileConfig, EdgeProgramManager
2628+
2629+
module = SimpleModel() # noqa: F405
2630+
sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
2631+
module = self.get_qdq_module(module, sample_input)
2632+
backend_options = generate_htp_compiler_spec(use_fp16=True)
2633+
compiler_specs = [
2634+
self.compiler_specs,
2635+
generate_qnn_executorch_compiler_spec(
2636+
soc_model=self.chipset_table[TestQNN.model],
2637+
backend_options=backend_options,
2638+
online_prepare=True,
2639+
),
2640+
]
2641+
validators = [validate_context_binary, validate_qcir]
2642+
2643+
for compiler_spec, validate in zip(compiler_specs, validators):
2644+
edge_prog_mgr = (
2645+
EdgeProgramManager(
2646+
edge_programs={
2647+
"forward": capture_program(
2648+
module, sample_input
2649+
).exported_program
2650+
},
2651+
compile_config=EdgeCompileConfig(_use_edge_ops=False),
2652+
)
2653+
.to_backend(QnnPartitioner(compiler_spec))
2654+
.to_executorch()
2655+
)
2656+
2657+
with tempfile.TemporaryDirectory() as tmp_dir:
2658+
pte_path = f"{tmp_dir}/model.pte"
2659+
with open(pte_path, "wb") as f:
2660+
edge_prog_mgr.write_to_file(f)
2661+
2662+
dump_context_from_pte(pte_path)
2663+
binary_name = f"{tmp_dir}/forward_0.bin"
2664+
self.assertTrue(os.path.isfile(binary_name))
2665+
with open(binary_name, "rb") as f:
2666+
stripped_binary = f.read()
2667+
validate(stripped_binary)
2668+
25132669
def test_qnn_backend_draw_graph(self):
25142670
golden_data = """digraph test {
25152671
rankdir=TB

backends/qualcomm/tests/utils.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,57 @@ def generate_context_binary(
108108
assert os.path.isfile(f"{artifact_dir}/model_ctx.bin"), print(result.stderr)
109109

110110

111+
def validate_context_binary(ctx_bin: bytes):
112+
qnn_sdk = os.environ.get("QNN_SDK_ROOT", None)
113+
assert qnn_sdk, "QNN_SDK_ROOT was not found in environment variable"
114+
115+
# flow of qnn tools
116+
with tempfile.TemporaryDirectory() as tmp_dir:
117+
with open(f"{tmp_dir}/ctx.bin", "wb") as binary_file:
118+
binary_file.write(ctx_bin)
119+
120+
target = "x86_64-linux-clang"
121+
cmds = [
122+
# qnn-context-binary-utility
123+
f"{qnn_sdk}/bin/{target}/qnn-context-binary-utility",
124+
"--context_binary",
125+
f"{tmp_dir}/ctx.bin",
126+
"--json_file",
127+
f"{tmp_dir}/ctx.json",
128+
]
129+
result = subprocess.run(
130+
" ".join(cmds),
131+
shell=True,
132+
executable="/bin/bash",
133+
capture_output=True,
134+
)
135+
assert os.path.isfile(f"{tmp_dir}/ctx.json"), print(result.stderr)
136+
137+
138+
def validate_qcir(qcir: bytes):
139+
with tempfile.TemporaryDirectory() as tmp_dir:
140+
with open(f"{tmp_dir}/qcir.bin", "wb") as binary_file:
141+
binary_file.write(qcir)
142+
143+
cmds = [
144+
"flatc",
145+
"-o",
146+
tmp_dir,
147+
"--raw-binary",
148+
"-t",
149+
f"{os.path.dirname(__file__)}/../aot/ir/qcir.fbs",
150+
"--",
151+
f"{tmp_dir}/qcir.bin",
152+
]
153+
result = subprocess.run(
154+
" ".join(cmds),
155+
shell=True,
156+
executable="/bin/bash",
157+
capture_output=True,
158+
)
159+
assert os.path.isfile(f"{tmp_dir}/qcir.json"), print(result.stderr)
160+
161+
111162
class TestQNN(unittest.TestCase):
112163
rtol: float = 0
113164
atol: float = 0

backends/qualcomm/utils/utils.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,44 @@ def replace_linear(module: torch.nn.Module):
213213
return replace_linear(module)
214214

215215

216+
def dump_context_from_pte(pte_path):
217+
"""
218+
Dump compiled binaries under the same directory of pte_path.
219+
For partitioned graph, there will be multiple files with names f"{graph_name}_{index}".
220+
Where 'graph_name' comes from the compiler_specs and 'index' represents the execution order.
221+
222+
Args:
223+
pte_path (str): The path of generated pte.
224+
"""
225+
import os
226+
227+
from executorch.exir._serialize._program import deserialize_pte_binary
228+
229+
with open(pte_path, "rb") as f:
230+
program_data = f.read()
231+
232+
program = deserialize_pte_binary(program_data)
233+
234+
ctx_path = os.path.dirname(pte_path)
235+
dummy_compiler_specs = generate_qnn_executorch_compiler_spec(
236+
soc_model=QcomChipset.SM8650,
237+
backend_options=generate_htp_compiler_spec(use_fp16=False),
238+
)
239+
qnn_mgr = PyQnnManagerAdaptor.QnnManager(
240+
generate_qnn_executorch_option(dummy_compiler_specs)
241+
)
242+
qnn_mgr.Init()
243+
for execution_plan in program.execution_plan:
244+
for i, delegate in enumerate(execution_plan.delegates):
245+
if delegate.id == "QnnBackend":
246+
processed_bytes = program.backend_delegate_data[
247+
delegate.processed.index
248+
].data
249+
binary = qnn_mgr.StripProtocol(processed_bytes)
250+
with open(f"{ctx_path}/{execution_plan.name}_{i}.bin", "wb") as f:
251+
f.write(binary)
252+
253+
216254
def update_spill_fill_size(
217255
exported_program: ExportedProgram | List[LoweredBackendModule],
218256
):

0 commit comments

Comments
 (0)