Skip to content

Qualcomm AI Engine Direct - Support QNN 2.28 #6811

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .ci/scripts/build-qnn-sdk.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ set -o xtrace
build_qnn_backend() {
echo "Start building qnn backend."
export ANDROID_NDK_ROOT=/opt/ndk
export QNN_SDK_ROOT=/tmp/qnn/2.25.0.240728
export QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"

bash backends/qualcomm/scripts/build.sh --skip_aarch64 --job_number 2 --release
Expand Down
4 changes: 2 additions & 2 deletions .ci/scripts/setup-qnn-deps.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ install_qnn() {
QNN_INSTALLATION_DIR=/tmp/qnn
mkdir -p "${QNN_INSTALLATION_DIR}"

curl -Lo /tmp/v2.25.0.24.07.28.zip "https://softwarecenter.qualcomm.com/api/download/software/qualcomm_neural_processing_sdk/v2.25.0.240728.zip"
curl -Lo /tmp/v2.28.0.24.10.29.zip "https://softwarecenter.qualcomm.com/api/download/software/qualcomm_neural_processing_sdk/v2.28.0.241029.zip"
echo "Finishing downloading qnn sdk."
unzip -qo /tmp/v2.25.0.24.07.28.zip -d /tmp
unzip -qo /tmp/v2.28.0.24.10.29.zip -d /tmp
echo "Finishing unzip qnn sdk."


Expand Down
2 changes: 1 addition & 1 deletion .ci/scripts/test_llama.sh
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ echo "COREML option ${COREML}"
if [[ "${MODE}" =~ .*qnn.* ]]; then
QNN=ON
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
export QNN_SDK_ROOT=/tmp/qnn/2.25.0.240728
export QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029
export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang"
export PYTHONPATH=".."
cp schema/program.fbs exir/_serialize/program.fbs
Expand Down
9 changes: 9 additions & 0 deletions backends/qualcomm/runtime/backends/QnnBackendCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ Error QnnBackendCache::GetQnnGraphInfoFromBinary(
} else if (binaryinfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_2) {
num_graphs = binaryinfo->contextBinaryInfoV2.numGraphs;
graphs = binaryinfo->contextBinaryInfoV2.graphs;
#if (QNN_API_VERSION_MAJOR >= 2 && QNN_API_VERSION_MINOR >= 21)
} else if (binaryinfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_3) {
num_graphs = binaryinfo->contextBinaryInfoV3.numGraphs;
graphs = binaryinfo->contextBinaryInfoV3.graphs;
#endif
} else {
QNN_EXECUTORCH_LOG_WARN(
"Unknown QNN BinaryInfo version %d.", binaryinfo->version);
Expand All @@ -62,6 +67,10 @@ Error QnnBackendCache::GetQnnGraphInfoFromBinary(
RetrieveGraphInfo<QnnSystemContext_GraphInfoV1_t>(graphs[i].graphInfoV1);
} else if (graphs->version == QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_2) {
RetrieveGraphInfo<QnnSystemContext_GraphInfoV2_t>(graphs[i].graphInfoV2);
#if (QNN_API_VERSION_MAJOR >= 2 && QNN_API_VERSION_MINOR >= 21)
} else if (graphs->version == QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_3) {
RetrieveGraphInfo<QnnSystemContext_GraphInfoV3_t>(graphs[i].graphInfoV3);
#endif
} else {
QNN_EXECUTORCH_LOG_WARN(
"Unknown QNN GraphInfo version %d.", binaryinfo->version);
Expand Down
43 changes: 31 additions & 12 deletions backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,34 +17,53 @@ using executorch::runtime::Error;
Error HtpBackendCache::RetrieveBackendBinaryInfo(
const QnnSystemContext_BinaryInfo_t* binaryinfo) {
QnnHtpSystemContext_HwBlobInfo_t* htp_hwblobinfo = nullptr;
#if (QNN_API_VERSION_MAJOR >= 2 && QNN_API_VERSION_MINOR >= 21)
QnnHtpSystemContext_GraphBlobInfo_t* htp_graphblobinfo = nullptr;
#endif

if (binaryinfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_1) {
htp_hwblobinfo = static_cast<QnnHtpSystemContext_HwBlobInfo_t*>(
binaryinfo->contextBinaryInfoV1.hwInfoBlob);
} else if (binaryinfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_2) {
htp_hwblobinfo = static_cast<QnnHtpSystemContext_HwBlobInfo_t*>(
binaryinfo->contextBinaryInfoV2.hwInfoBlob);
#if (QNN_API_VERSION_MAJOR >= 2 && QNN_API_VERSION_MINOR >= 21)
} else if (binaryinfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_3) {
htp_graphblobinfo = static_cast<QnnHtpSystemContext_GraphBlobInfo_t*>(
binaryinfo->contextBinaryInfoV3.graphs->graphInfoV3.graphBlobInfo);
#endif
} else {
QNN_EXECUTORCH_LOG_WARN(
"Unknown QNN BinaryInfo version %d.", binaryinfo->version);
return Error::Internal;
}

if (htp_hwblobinfo == nullptr) {
QNN_EXECUTORCH_LOG_WARN(
"Htp hardware blob information is not found in binary information.");
return Error::Ok;
if (htp_hwblobinfo) {
if (htp_hwblobinfo->version ==
QNN_SYSTEM_CONTEXT_HTP_HW_INFO_BLOB_VERSION_V1) {
spill_fill_buf_ =
(*htp_hwblobinfo).contextBinaryHwInfoBlobV1_t.spillFillBufferSize;
} else {
QNN_EXECUTORCH_LOG_WARN(
"Unknown QNN Htp hw blob info version %d.", htp_hwblobinfo->version);
return Error::Internal;
}
}

if (htp_hwblobinfo->version ==
QNN_SYSTEM_CONTEXT_HTP_HW_INFO_BLOB_VERSION_V1) {
spill_fill_buf_ =
(*htp_hwblobinfo).contextBinaryHwInfoBlobV1_t.spillFillBufferSize;
} else {
QNN_EXECUTORCH_LOG_WARN(
"Unknown QNN Htp hw blob info version %d.", htp_hwblobinfo->version);
return Error::Internal;
#if (QNN_API_VERSION_MAJOR >= 2 && QNN_API_VERSION_MINOR >= 21)
if (htp_graphblobinfo) {
if (htp_graphblobinfo->version ==
QNN_SYSTEM_CONTEXT_HTP_GRAPH_INFO_BLOB_VERSION_V1) {
spill_fill_buf_ =
(*htp_graphblobinfo).contextBinaryGraphBlobInfoV1.spillFillBufferSize;
} else {
QNN_EXECUTORCH_LOG_WARN(
"Unknown QNN Htp graph blob info version %d.",
htp_graphblobinfo->version);
return Error::Internal;
}
}
#endif

return Error::Ok;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ This example is verified with SM8550 and SM8450.
- Click the "Get Software" button to download a version of QNN SDK.
- However, at the moment of updating this tutorial, the above website doesn't provide QNN SDK newer than 2.22.6.
- The below is public links to download various QNN versions. Hope they can be publicly discoverable soon.
- [QNN 2.26.0](https://softwarecenter.qualcomm.com/api/download/software/qualcomm_neural_processing_sdk/v2.26.0.240828.zip)
- [QNN 2.28.0](https://softwarecenter.qualcomm.com/api/download/software/qualcomm_neural_processing_sdk/v2.28.0.241029.zip)

The directory with installed Qualcomm AI Engine Direct SDK looks like:
```
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ This tutorial demonstrates how to export Llama 3 8B Instruct for Qualcomm AI Eng
- Follow [the README for executorch llama](https://github.com/pytorch/executorch/tree/main/examples/models/llama) to know how to run a llama model on mobile via ExecuTorch.
- A Qualcomm device with 16GB RAM
- We are continuing to optimize our memory usage to ensure compatibility with lower memory devices.
- The version of [Qualcomm AI Engine Direct SDK](https://developer.qualcomm.com/software/qualcomm-ai-engine-direct-sdk) is 2.26.0 or above.
- The version of [Qualcomm AI Engine Direct SDK](https://developer.qualcomm.com/software/qualcomm-ai-engine-direct-sdk) is 2.28.0 or above.

## Instructions

Expand Down
2 changes: 1 addition & 1 deletion shim/xplat/executorch/backends/qualcomm/qnn_version.bzl
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
def get_qnn_library_verision():
return "2.26"
return "2.28"
Loading