Skip to content

Qualcomm AI Engine Direct - context dump utility #7931

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion backends/qualcomm/aot/python/PyQnnManagerAdaptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ PYBIND11_MODULE(PyQnnManagerAdaptor, m) {
.def("GetSpillFillBufferSize", &PyQnnManager::GetSpillFillBufferSize)
.def(
"MakeBinaryInfo",
py::overload_cast<const py::bytes&>(&PyQnnManager::MakeBinaryInfo));
py::overload_cast<const py::bytes&>(&PyQnnManager::MakeBinaryInfo))
.def("StripProtocol", &PyQnnManager::StripProtocol);
}
} // namespace qnn
} // namespace backends
Expand Down
35 changes: 35 additions & 0 deletions backends/qualcomm/aot/python/PyQnnManagerAdaptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,41 @@ class PyQnnManager {
return result;
}

py::array_t<char> StripProtocol(const py::bytes& preprocessed_binary) {
py::buffer_info info(py::buffer(preprocessed_binary).request());

void* buf_ptr = nullptr;
size_t buf_size = 0;
// check if it's a qnn context binary
auto [status, signature, ctx_size, ctx_bin] =
QnnContextCustomProtocol().DeserializeContextCustomBuffer(info.ptr);

if (status == Error::Ok) {
buf_size = ctx_size;
buf_ptr = ctx_bin;
} else {
// check if it's a qcir flatbuffers, return fbs if matched
auto
[status,
qcir_fbs_size,
qcir_tensor_size,
qcir_fbs_ptr,
qcir_tensor_ptr] =
QnnQcirCustomProtocol().DeserializeQcirCustomBuffer(info.ptr);
if (status == Error::Ok) {
buf_size = qcir_fbs_size;
buf_ptr = qcir_fbs_ptr;
} else {
// the format should be DLC, return nothing here
return py::array_t<char>(0);
}
}
auto result = py::array_t<char>(buf_size);
auto result_buffer = result.request();
std::memcpy(result_buffer.ptr, buf_ptr, buf_size);
return result;
}

private:
// Store the bytes object instead of a raw pointer so that this module will
// keep the bytes alive.
Expand Down
158 changes: 157 additions & 1 deletion backends/qualcomm/tests/test_qnn_delegate.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
QuantDtype,
TestQNN,
to_backend,
validate_context_binary,
validate_qcir,
)
from executorch.backends.qualcomm.utils.constants import (
QCOM_ANNOTATION,
Expand All @@ -30,10 +32,12 @@

from executorch.backends.qualcomm.utils.utils import (
capture_program,
dump_context_from_pte,
from_context_binary,
generate_htp_compiler_spec,
generate_multi_graph_program,
generate_qnn_executorch_compiler_spec,
PyQnnManagerAdaptor,
skip_annotation,
update_spill_fill_size,
)
Expand Down Expand Up @@ -2041,6 +2045,81 @@ def test_qnn_backend_context_direct(self):
bundle_program["edge_program_manager"].to_executorch(),
)

def test_qnn_backend_context_extraction(self):
from executorch.exir import EdgeCompileConfig, EdgeProgramManager

module = SimpleModel() # noqa: F405
sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
backend_options = generate_htp_compiler_spec(use_fp16=True)
compiler_specs = [
self.compiler_specs,
generate_qnn_executorch_compiler_spec(
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
online_prepare=True,
),
]
validators = [validate_context_binary, validate_qcir]

for compiler_spec, validate in zip(compiler_specs, validators):
edge_prog_mgr = EdgeProgramManager(
edge_programs={
"forward": capture_program(module, sample_input).exported_program
},
compile_config=EdgeCompileConfig(_use_edge_ops=False),
).to_backend(QnnPartitioner(compiler_spec))
lowered_module = edge_prog_mgr.exported_program().graph_module._modules[
"lowered_module_0"
]
qnn_mgr = PyQnnManagerAdaptor.QnnManager(
lowered_module.compile_specs[0].value
)
qnn_mgr.Init()
binary = qnn_mgr.StripProtocol(lowered_module.processed_bytes)
validate(binary)

def test_qnn_backend_dump_context_from_pte(self):
from executorch.exir import EdgeCompileConfig, EdgeProgramManager

module = SimpleModel() # noqa: F405
sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
backend_options = generate_htp_compiler_spec(use_fp16=True)
compiler_specs = [
self.compiler_specs,
generate_qnn_executorch_compiler_spec(
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
online_prepare=True,
),
]
validators = [validate_context_binary, validate_qcir]

for compiler_spec, validate in zip(compiler_specs, validators):
edge_prog_mgr = (
EdgeProgramManager(
edge_programs={
"forward": capture_program(
module, sample_input
).exported_program
},
compile_config=EdgeCompileConfig(_use_edge_ops=False),
)
.to_backend(QnnPartitioner(compiler_spec))
.to_executorch()
)

with tempfile.TemporaryDirectory() as tmp_dir:
pte_path = f"{tmp_dir}/model.pte"
with open(pte_path, "wb") as f:
edge_prog_mgr.write_to_file(f)

dump_context_from_pte(pte_path)
binary_name = f"{tmp_dir}/forward_0.bin"
self.assertTrue(os.path.isfile(binary_name))
with open(binary_name, "rb") as f:
stripped_binary = f.read()
validate(stripped_binary)

def test_qnn_backend_draw_graph(self):
golden_data = """digraph test {
rankdir=TB
Expand Down Expand Up @@ -2433,7 +2512,7 @@ def test_qnn_backend_multi_graphs(self):
for module, sample_input in zip(modules, sample_inputs)
]
backend_options = generate_htp_compiler_spec(
use_fp16=True,
use_fp16=False,
)
compiler_specs = [
generate_qnn_executorch_compiler_spec(
Expand Down Expand Up @@ -2532,6 +2611,83 @@ def test_qnn_backend_context_direct(self):
bundle_program["edge_program_manager"].to_executorch(),
)

def test_qnn_backend_context_extraction(self):
from executorch.exir import EdgeCompileConfig, EdgeProgramManager

module = SimpleModel() # noqa: F405
sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
module = self.get_qdq_module(module, sample_input)
backend_options = generate_htp_compiler_spec(use_fp16=False)
compiler_specs = [
self.compiler_specs,
generate_qnn_executorch_compiler_spec(
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
online_prepare=True,
),
]
validators = [validate_context_binary, validate_qcir]

for compiler_spec, validate in zip(compiler_specs, validators):
edge_prog_mgr = EdgeProgramManager(
edge_programs={
"forward": capture_program(module, sample_input).exported_program
},
compile_config=EdgeCompileConfig(_use_edge_ops=False),
).to_backend(QnnPartitioner(compiler_spec))
lowered_module = edge_prog_mgr.exported_program().graph_module._modules[
"lowered_module_0"
]
qnn_mgr = PyQnnManagerAdaptor.QnnManager(
lowered_module.compile_specs[0].value
)
qnn_mgr.Init()
binary = qnn_mgr.StripProtocol(lowered_module.processed_bytes)
validate(binary)

def test_qnn_backend_dump_context_from_pte(self):
from executorch.exir import EdgeCompileConfig, EdgeProgramManager

module = SimpleModel() # noqa: F405
sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
module = self.get_qdq_module(module, sample_input)
backend_options = generate_htp_compiler_spec(use_fp16=True)
compiler_specs = [
self.compiler_specs,
generate_qnn_executorch_compiler_spec(
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
online_prepare=True,
),
]
validators = [validate_context_binary, validate_qcir]

for compiler_spec, validate in zip(compiler_specs, validators):
edge_prog_mgr = (
EdgeProgramManager(
edge_programs={
"forward": capture_program(
module, sample_input
).exported_program
},
compile_config=EdgeCompileConfig(_use_edge_ops=False),
)
.to_backend(QnnPartitioner(compiler_spec))
.to_executorch()
)

with tempfile.TemporaryDirectory() as tmp_dir:
pte_path = f"{tmp_dir}/model.pte"
with open(pte_path, "wb") as f:
edge_prog_mgr.write_to_file(f)

dump_context_from_pte(pte_path)
binary_name = f"{tmp_dir}/forward_0.bin"
self.assertTrue(os.path.isfile(binary_name))
with open(binary_name, "rb") as f:
stripped_binary = f.read()
validate(stripped_binary)

def test_qnn_backend_draw_graph(self):
golden_data = """digraph test {
rankdir=TB
Expand Down
51 changes: 51 additions & 0 deletions backends/qualcomm/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,57 @@ def generate_context_binary(
assert os.path.isfile(f"{artifact_dir}/model_ctx.bin"), print(result.stderr)


def validate_context_binary(ctx_bin: bytes):
qnn_sdk = os.environ.get("QNN_SDK_ROOT", None)
assert qnn_sdk, "QNN_SDK_ROOT was not found in environment variable"

# flow of qnn tools
with tempfile.TemporaryDirectory() as tmp_dir:
with open(f"{tmp_dir}/ctx.bin", "wb") as binary_file:
binary_file.write(ctx_bin)

target = "x86_64-linux-clang"
cmds = [
# qnn-context-binary-utility
f"{qnn_sdk}/bin/{target}/qnn-context-binary-utility",
"--context_binary",
f"{tmp_dir}/ctx.bin",
"--json_file",
f"{tmp_dir}/ctx.json",
]
result = subprocess.run(
" ".join(cmds),
shell=True,
executable="/bin/bash",
capture_output=True,
)
assert os.path.isfile(f"{tmp_dir}/ctx.json"), print(result.stderr)


def validate_qcir(qcir: bytes):
with tempfile.TemporaryDirectory() as tmp_dir:
with open(f"{tmp_dir}/qcir.bin", "wb") as binary_file:
binary_file.write(qcir)

cmds = [
"flatc",
"-o",
tmp_dir,
"--raw-binary",
"-t",
f"{os.path.dirname(__file__)}/../aot/ir/qcir.fbs",
"--",
f"{tmp_dir}/qcir.bin",
]
result = subprocess.run(
" ".join(cmds),
shell=True,
executable="/bin/bash",
capture_output=True,
)
assert os.path.isfile(f"{tmp_dir}/qcir.json"), print(result.stderr)


class TestQNN(unittest.TestCase):
rtol: float = 0
atol: float = 0
Expand Down
38 changes: 38 additions & 0 deletions backends/qualcomm/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,44 @@ def replace_linear(module: torch.nn.Module):
return replace_linear(module)


def dump_context_from_pte(pte_path):
"""
Dump compiled binaries under the same directory of pte_path.
For partitioned graph, there will be multiple files with names f"{graph_name}_{index}".
Where 'graph_name' comes from the compiler_specs and 'index' represents the execution order.

Args:
pte_path (str): The path of generated pte.
"""
import os

from executorch.exir._serialize._program import deserialize_pte_binary

with open(pte_path, "rb") as f:
program_data = f.read()

program = deserialize_pte_binary(program_data)

ctx_path = os.path.dirname(pte_path)
dummy_compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=QcomChipset.SM8650,
backend_options=generate_htp_compiler_spec(use_fp16=False),
)
qnn_mgr = PyQnnManagerAdaptor.QnnManager(
generate_qnn_executorch_option(dummy_compiler_specs)
)
qnn_mgr.Init()
for execution_plan in program.execution_plan:
for i, delegate in enumerate(execution_plan.delegates):
if delegate.id == "QnnBackend":
processed_bytes = program.backend_delegate_data[
delegate.processed.index
].data
binary = qnn_mgr.StripProtocol(processed_bytes)
with open(f"{ctx_path}/{execution_plan.name}_{i}.bin", "wb") as f:
f.write(binary)


def update_spill_fill_size(
exported_program: ExportedProgram | List[LoweredBackendModule],
):
Expand Down
Loading