Skip to content

Commit d3d3d78

Browse files
committed
Qualcomm AI Engine Direct - context dump utility
summary: - utility for dumping compiled binaries (QNN context_binary / QCIR) - test cases
1 parent f73b8cf commit d3d3d78

File tree

5 files changed

+285
-2
lines changed

5 files changed

+285
-2
lines changed

backends/qualcomm/aot/python/PyQnnManagerAdaptor.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ PYBIND11_MODULE(PyQnnManagerAdaptor, m) {
4949
.def("GetSpillFillBufferSize", &PyQnnManager::GetSpillFillBufferSize)
5050
.def(
5151
"MakeBinaryInfo",
52-
py::overload_cast<const py::bytes&>(&PyQnnManager::MakeBinaryInfo));
52+
py::overload_cast<const py::bytes&>(&PyQnnManager::MakeBinaryInfo))
53+
.def("StripProtocol", &PyQnnManager::StripProtocol);
5354
}
5455
} // namespace qnn
5556
} // namespace backends

backends/qualcomm/aot/python/PyQnnManagerAdaptor.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,43 @@ class PyQnnManager {
390390
return result;
391391
}
392392

393+
py::array_t<char> StripProtocol(const py::bytes& preprocessed_binary) {
394+
py::buffer_info info(py::buffer(preprocessed_binary).request());
395+
QnnExecuTorchContextBinary binary(
396+
{info.ptr, static_cast<uint64_t>(info.size * info.itemsize)});
397+
398+
void* buf_ptr = nullptr;
399+
size_t buf_size = 0;
400+
// check if it's a qnn context binary
401+
auto [status, signature, ctx_size, ctx_bin] =
402+
QnnContextCustomProtocol().DeserializeContextCustomBuffer(info.ptr);
403+
404+
if (status == Error::Ok) {
405+
buf_size = ctx_size;
406+
buf_ptr = ctx_bin;
407+
} else {
408+
// check if it's a qcir flatbuffers, return fbs if matched
409+
auto
410+
[status,
411+
qcir_fbs_size,
412+
qcir_tensor_size,
413+
qcir_fbs_ptr,
414+
qcir_tensor_ptr] =
415+
QnnQcirCustomProtocol().DeserializeQcirCustomBuffer(info.ptr);
416+
if (status == Error::Ok) {
417+
buf_size = qcir_fbs_size;
418+
buf_ptr = qcir_fbs_ptr;
419+
} else {
420+
// the format should be DLC, return nothing here
421+
return py::array_t<char>(0);
422+
}
423+
}
424+
auto result = py::array_t<char>(buf_size);
425+
auto result_buffer = result.request();
426+
std::memcpy(result_buffer.ptr, buf_ptr, buf_size);
427+
return result;
428+
}
429+
393430
private:
394431
// Store the bytes object instead of a raw pointer so that this module will
395432
// keep the bytes alive.

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 157 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
QuantDtype,
2121
TestQNN,
2222
to_backend,
23+
validate_context_binary,
24+
validate_qcir,
2325
)
2426
from executorch.backends.qualcomm.utils.constants import (
2527
QCOM_ANNOTATION,
@@ -30,10 +32,12 @@
3032

3133
from executorch.backends.qualcomm.utils.utils import (
3234
capture_program,
35+
dump_context_from_pte,
3336
from_context_binary,
3437
generate_htp_compiler_spec,
3538
generate_multi_graph_program,
3639
generate_qnn_executorch_compiler_spec,
40+
PyQnnManagerAdaptor,
3741
skip_annotation,
3842
update_spill_fill_size,
3943
)
@@ -2003,6 +2007,81 @@ def test_qnn_backend_context_direct(self):
20032007
bundle_program["edge_program_manager"].to_executorch(),
20042008
)
20052009

2010+
def test_qnn_backend_context_extraction(self):
2011+
from executorch.exir import EdgeCompileConfig, EdgeProgramManager
2012+
2013+
module = SimpleModel() # noqa: F405
2014+
sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
2015+
backend_options = generate_htp_compiler_spec(use_fp16=True)
2016+
compiler_specs = [
2017+
self.compiler_specs,
2018+
generate_qnn_executorch_compiler_spec(
2019+
soc_model=self.chipset_table[TestQNN.model],
2020+
backend_options=backend_options,
2021+
online_prepare=True,
2022+
),
2023+
]
2024+
validators = [validate_context_binary, validate_qcir]
2025+
2026+
for compiler_spec, validate in zip(compiler_specs, validators):
2027+
edge_prog_mgr = EdgeProgramManager(
2028+
edge_programs={
2029+
"forward": capture_program(module, sample_input).exported_program
2030+
},
2031+
compile_config=EdgeCompileConfig(_use_edge_ops=False),
2032+
).to_backend(QnnPartitioner(compiler_spec))
2033+
lowered_module = edge_prog_mgr.exported_program().graph_module._modules[
2034+
"lowered_module_0"
2035+
]
2036+
qnn_mgr = PyQnnManagerAdaptor.QnnManager(
2037+
lowered_module.compile_specs[0].value
2038+
)
2039+
qnn_mgr.Init()
2040+
binary = qnn_mgr.StripProtocol(lowered_module.processed_bytes)
2041+
validate(binary)
2042+
2043+
def test_qnn_backend_dump_context_from_pte(self):
2044+
from executorch.exir import EdgeCompileConfig, EdgeProgramManager
2045+
2046+
module = SimpleModel() # noqa: F405
2047+
sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
2048+
backend_options = generate_htp_compiler_spec(use_fp16=True)
2049+
compiler_specs = [
2050+
self.compiler_specs,
2051+
generate_qnn_executorch_compiler_spec(
2052+
soc_model=self.chipset_table[TestQNN.model],
2053+
backend_options=backend_options,
2054+
online_prepare=True,
2055+
),
2056+
]
2057+
validators = [validate_context_binary, validate_qcir]
2058+
2059+
for compiler_spec, validate in zip(compiler_specs, validators):
2060+
edge_prog_mgr = (
2061+
EdgeProgramManager(
2062+
edge_programs={
2063+
"forward": capture_program(
2064+
module, sample_input
2065+
).exported_program
2066+
},
2067+
compile_config=EdgeCompileConfig(_use_edge_ops=False),
2068+
)
2069+
.to_backend(QnnPartitioner(compiler_spec))
2070+
.to_executorch()
2071+
)
2072+
2073+
with tempfile.TemporaryDirectory() as tmp_dir:
2074+
pte_path = f"{tmp_dir}/model.pte"
2075+
with open(pte_path, "wb") as f:
2076+
edge_prog_mgr.write_to_file(f)
2077+
2078+
dump_context_from_pte(pte_path)
2079+
binary_name = f"{tmp_dir}/forward_0.bin"
2080+
self.assertTrue(os.path.isfile(binary_name))
2081+
with open(binary_name, "rb") as f:
2082+
stripped_binary = f.read()
2083+
validate(stripped_binary)
2084+
20062085
def test_qnn_backend_draw_graph(self):
20072086
golden_data = """digraph test {
20082087
rankdir=TB
@@ -2395,7 +2474,7 @@ def test_qnn_backend_multi_graphs(self):
23952474
for module, sample_input in zip(modules, sample_inputs)
23962475
]
23972476
backend_options = generate_htp_compiler_spec(
2398-
use_fp16=True,
2477+
use_fp16=False,
23992478
)
24002479
compiler_specs = [
24012480
generate_qnn_executorch_compiler_spec(
@@ -2494,6 +2573,83 @@ def test_qnn_backend_context_direct(self):
24942573
bundle_program["edge_program_manager"].to_executorch(),
24952574
)
24962575

2576+
def test_qnn_backend_context_extraction(self):
2577+
from executorch.exir import EdgeCompileConfig, EdgeProgramManager
2578+
2579+
module = SimpleModel() # noqa: F405
2580+
sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
2581+
module = self.get_qdq_module(module, sample_input)
2582+
backend_options = generate_htp_compiler_spec(use_fp16=False)
2583+
compiler_specs = [
2584+
self.compiler_specs,
2585+
generate_qnn_executorch_compiler_spec(
2586+
soc_model=self.chipset_table[TestQNN.model],
2587+
backend_options=backend_options,
2588+
online_prepare=True,
2589+
),
2590+
]
2591+
validators = [validate_context_binary, validate_qcir]
2592+
2593+
for compiler_spec, validate in zip(compiler_specs, validators):
2594+
edge_prog_mgr = EdgeProgramManager(
2595+
edge_programs={
2596+
"forward": capture_program(module, sample_input).exported_program
2597+
},
2598+
compile_config=EdgeCompileConfig(_use_edge_ops=False),
2599+
).to_backend(QnnPartitioner(compiler_spec))
2600+
lowered_module = edge_prog_mgr.exported_program().graph_module._modules[
2601+
"lowered_module_0"
2602+
]
2603+
qnn_mgr = PyQnnManagerAdaptor.QnnManager(
2604+
lowered_module.compile_specs[0].value
2605+
)
2606+
qnn_mgr.Init()
2607+
binary = qnn_mgr.StripProtocol(lowered_module.processed_bytes)
2608+
validate(binary)
2609+
2610+
def test_qnn_backend_dump_context_from_pte(self):
2611+
from executorch.exir import EdgeCompileConfig, EdgeProgramManager
2612+
2613+
module = SimpleModel() # noqa: F405
2614+
sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
2615+
module = self.get_qdq_module(module, sample_input)
2616+
backend_options = generate_htp_compiler_spec(use_fp16=True)
2617+
compiler_specs = [
2618+
self.compiler_specs,
2619+
generate_qnn_executorch_compiler_spec(
2620+
soc_model=self.chipset_table[TestQNN.model],
2621+
backend_options=backend_options,
2622+
online_prepare=True,
2623+
),
2624+
]
2625+
validators = [validate_context_binary, validate_qcir]
2626+
2627+
for compiler_spec, validate in zip(compiler_specs, validators):
2628+
edge_prog_mgr = (
2629+
EdgeProgramManager(
2630+
edge_programs={
2631+
"forward": capture_program(
2632+
module, sample_input
2633+
).exported_program
2634+
},
2635+
compile_config=EdgeCompileConfig(_use_edge_ops=False),
2636+
)
2637+
.to_backend(QnnPartitioner(compiler_spec))
2638+
.to_executorch()
2639+
)
2640+
2641+
with tempfile.TemporaryDirectory() as tmp_dir:
2642+
pte_path = f"{tmp_dir}/model.pte"
2643+
with open(pte_path, "wb") as f:
2644+
edge_prog_mgr.write_to_file(f)
2645+
2646+
dump_context_from_pte(pte_path)
2647+
binary_name = f"{tmp_dir}/forward_0.bin"
2648+
self.assertTrue(os.path.isfile(binary_name))
2649+
with open(binary_name, "rb") as f:
2650+
stripped_binary = f.read()
2651+
validate(stripped_binary)
2652+
24972653
def test_qnn_backend_draw_graph(self):
24982654
golden_data = """digraph test {
24992655
rankdir=TB

backends/qualcomm/tests/utils.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,57 @@ def generate_context_binary(
108108
assert os.path.isfile(f"{artifact_dir}/model_ctx.bin"), print(result.stderr)
109109

110110

111+
def validate_context_binary(ctx_bin: bytes):
112+
qnn_sdk = os.environ.get("QNN_SDK_ROOT", None)
113+
assert qnn_sdk, "QNN_SDK_ROOT was not found in environment variable"
114+
115+
# flow of qnn tools
116+
with tempfile.TemporaryDirectory() as tmp_dir:
117+
with open(f"{tmp_dir}/ctx.bin", "wb") as binary_file:
118+
binary_file.write(ctx_bin)
119+
120+
target = "x86_64-linux-clang"
121+
cmds = [
122+
# qnn-context-binary-utility
123+
f"{qnn_sdk}/bin/{target}/qnn-context-binary-utility",
124+
"--context_binary",
125+
f"{tmp_dir}/ctx.bin",
126+
"--json_file",
127+
f"{tmp_dir}/ctx.json",
128+
]
129+
result = subprocess.run(
130+
" ".join(cmds),
131+
shell=True,
132+
executable="/bin/bash",
133+
capture_output=True,
134+
)
135+
assert os.path.isfile(f"{tmp_dir}/ctx.json"), print(result.stderr)
136+
137+
138+
def validate_qcir(qcir: bytes):
139+
with tempfile.TemporaryDirectory() as tmp_dir:
140+
with open(f"{tmp_dir}/qcir.bin", "wb") as binary_file:
141+
binary_file.write(qcir)
142+
143+
cmds = [
144+
"flatc",
145+
"-o",
146+
tmp_dir,
147+
"--raw-binary",
148+
"-t",
149+
f"{os.path.dirname(__file__)}/../aot/ir/qcir.fbs",
150+
"--",
151+
f"{tmp_dir}/qcir.bin",
152+
]
153+
result = subprocess.run(
154+
" ".join(cmds),
155+
shell=True,
156+
executable="/bin/bash",
157+
capture_output=True,
158+
)
159+
assert os.path.isfile(f"{tmp_dir}/qcir.json"), print(result.stderr)
160+
161+
111162
class TestQNN(unittest.TestCase):
112163
rtol: float = 0
113164
atol: float = 0

backends/qualcomm/utils/utils.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,44 @@ def replace_linear(module: torch.nn.Module):
213213
return replace_linear(module)
214214

215215

216+
def dump_context_from_pte(pte_path):
217+
"""
218+
Dump compiled binaries under the same directory of pte_path.
219+
For partitioned graph, there will be multiple files with names f"{graph_name}_{index}".
220+
Where 'graph_name' comes from the compiler_specs and 'index' represents the execution order.
221+
222+
Args:
223+
pte_path (str): The path of generated pte.
224+
"""
225+
import os
226+
227+
from executorch.exir._serialize._program import deserialize_pte_binary
228+
229+
with open(pte_path, "rb") as f:
230+
program_data = f.read()
231+
232+
program = deserialize_pte_binary(program_data)
233+
234+
ctx_path = os.path.dirname(pte_path)
235+
dummy_compiler_specs = generate_qnn_executorch_compiler_spec(
236+
soc_model=QcomChipset.SM8650,
237+
backend_options=generate_htp_compiler_spec(use_fp16=False),
238+
)
239+
qnn_mgr = PyQnnManagerAdaptor.QnnManager(
240+
generate_qnn_executorch_option(dummy_compiler_specs)
241+
)
242+
qnn_mgr.Init()
243+
for execution_plan in program.execution_plan:
244+
for i, delegate in enumerate(execution_plan.delegates):
245+
if delegate.id == "QnnBackend":
246+
processed_bytes = program.backend_delegate_data[
247+
delegate.processed.index
248+
].data
249+
binary = qnn_mgr.StripProtocol(processed_bytes)
250+
with open(f"{ctx_path}/{execution_plan.name}_{i}.bin", "wb") as f:
251+
f.write(binary)
252+
253+
216254
def update_spill_fill_size(
217255
exported_program: ExportedProgram | List[LoweredBackendModule],
218256
):

0 commit comments

Comments
 (0)