Skip to content

Qualcomm AI Engine Direct - Refine max spill fill buffer setting #5989

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion backends/qualcomm/aot/python/PyQnnManagerAdaptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ PYBIND11_MODULE(PyQnnManagerAdaptor, m) {
.def("IsTensorDump", &PyQnnManager::IsTensorDump)
.def("AllocateTensor", &PyQnnManager::AllocateTensor)
.def("GetGraphInputs", &PyQnnManager::GetGraphInputs)
.def("GetGraphOutputs", &PyQnnManager::GetGraphOutputs);
.def("GetGraphOutputs", &PyQnnManager::GetGraphOutputs)
.def("GetSpillFillBufferSize", &PyQnnManager::GetSpillFillBufferSize);
}
} // namespace qnn
} // namespace executor
Expand Down
4 changes: 4 additions & 0 deletions backends/qualcomm/aot/python/PyQnnManagerAdaptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,10 @@ class PyQnnManager {
return ret;
}

uint64_t GetSpillFillBufferSize() {
return qnn_manager_->GetSpillFillBufferSize();
}

private:
// Store the bytes object instead of a raw pointer so that this module will
// keep the bytes alive.
Expand Down
4 changes: 4 additions & 0 deletions backends/qualcomm/runtime/QnnManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,10 @@ Error QnnManager::Init() {
qnn_loaded_backend_, logger_.get(), qnn_context_blob_, options_);
ET_CHECK_OR_RETURN_ERROR(
backend_params_ptr_ != nullptr, Internal, "Failed to load Qnn backend.")
ET_CHECK_OR_RETURN_ERROR(
backend_params_ptr_->qnn_backend_cache_ptr_->Configure() == Error::Ok,
Internal,
"Fail to configure Qnn backend cache");
ET_CHECK_OR_RETURN_ERROR(
backend_params_ptr_->qnn_backend_ptr_->Configure() == Error::Ok,
Internal,
Expand Down
6 changes: 6 additions & 0 deletions backends/qualcomm/runtime/QnnManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,12 @@ class QnnManager {
// Pre-register custom memory handle from the SharedBuffer before execution
Error PreRegisterMem();

uint64_t GetSpillFillBufferSize() {
auto* htp_backend_cache_ptr = static_cast<HtpBackendCache*>(
backend_params_ptr_->qnn_backend_cache_ptr_.get());
return htp_backend_cache_ptr->GetSpillFillBufferSize();
}

std::vector<std::shared_ptr<TensorWrapper>> GetGraphInputs() {
return input_tensors_;
}
Expand Down
3 changes: 3 additions & 0 deletions backends/qualcomm/runtime/backends/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,9 @@ target_sources(
target_sources(
qnn_backend_cache
PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCache.h
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpBackendCache.h
PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCache.cpp
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpBackendCache.cpp
)

# qnn_graph
Expand Down Expand Up @@ -130,6 +132,7 @@ set(qnn_header_basenames
HTP/QnnHtpPerfInfrastructure.h
HTP/QnnHtpProfile.h
HTP/QnnHtpProperty.h
HTP/QnnHtpSystemContext.h
QnnInterface.h
QnnLog.h
QnnMem.h
Expand Down
23 changes: 14 additions & 9 deletions backends/qualcomm/runtime/backends/QnnBackendCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,20 @@ Error QnnBackendCache::GetQnnGraphInfoFromBinary() {

if (error != QNN_SUCCESS) {
QNN_EXECUTORCH_LOG_WARN(
"Failed to interpret QNN Context "
"Failed to interpret QNN context "
"binary. Error code %d. "
"Try verifying binary with online-prepare format.",
QNN_GET_ERROR_CODE(error));
return Error::Internal;
}

Error status = RetrieveBackendBinaryInfo(binaryinfo);
if (status == Error::Internal) {
QNN_EXECUTORCH_LOG_ERROR(
"Failed to retrieve backend binary info from QNN context binary.");
return Error::Internal;
}

if (binaryinfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_1) {
num_graphs = binaryinfo->contextBinaryInfoV1.numGraphs;
graph = binaryinfo->contextBinaryInfoV1.graphs;
Expand Down Expand Up @@ -81,20 +88,18 @@ Error QnnBackendCache::GetQnnGraphInfoFromBinary() {
return Error::Ok;
}

QnnBackendCache::QnnBackendCache(
const QnnExecuTorchContextBinary& qnn_context_blob)
: qnn_context_blob_(qnn_context_blob) {
Error QnnBackendCache::Configure() {
if (qnn_context_blob_.buffer == nullptr) {
state_ = SERIALIZE;
QNN_EXECUTORCH_LOG_INFO("Caching: Caching is in SAVE MODE.");
return;
return Error::Ok;
}

if (qnn_sys_impl_.Load() != Error::Ok) {
QNN_EXECUTORCH_LOG_ERROR(
"Failed to Load QnnSystem "
"APIs. Caching mechanism is being disabled.");
return;
return Error::Internal;
}

Qnn_ErrorHandle_t error = QNN_SUCCESS;
Expand All @@ -109,7 +114,7 @@ QnnBackendCache::QnnBackendCache(
"Failed to create Qnn "
"SystemContext. Caching mechanism will be disabled. Error code %d",
QNN_GET_ERROR_CODE(error));
return;
return Error::Internal;
}

// DO DESERIALIZE
Expand All @@ -125,16 +130,16 @@ QnnBackendCache::QnnBackendCache(

if (qcir::VerifyGraphBuffer(verifier)) {
state_ = ONLINE_PREPARE;
return;
return Error::Ok;
}

QNN_EXECUTORCH_LOG_ERROR(
"Failed to parse QNN Graph Info. The cache "
"might be broken. Please consider to re-generate the "
"cache.");
InvalidateCache();
return;
}
return Error::Ok;
}

QnnBackendCache::~QnnBackendCache() {
Expand Down
14 changes: 11 additions & 3 deletions backends/qualcomm/runtime/backends/QnnBackendCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ class QnnBackendCache {
DESERIALIZE = 2,
ONLINE_PREPARE = 3,
};
explicit QnnBackendCache(const QnnExecuTorchContextBinary& qnn_context_blob);

~QnnBackendCache();
explicit QnnBackendCache(const QnnExecuTorchContextBinary& qnn_context_blob)
: qnn_context_blob_(qnn_context_blob) {}
virtual ~QnnBackendCache();
QnnBackendCache(const QnnBackendCache&) = delete;
QnnBackendCache(QnnBackendCache&&) = delete;
QnnBackendCache& operator=(const QnnBackendCache&) = delete;
Expand All @@ -51,6 +51,14 @@ class QnnBackendCache {
return graph_name_;
}

Error Configure();

protected:
virtual Error RetrieveBackendBinaryInfo(
__ET_UNUSED const QnnSystemContext_BinaryInfo_t* binaryinfo) {
return Error::Ok;
}

private:
Error GetQnnGraphInfoFromBinary();

Expand Down
5 changes: 4 additions & 1 deletion backends/qualcomm/runtime/backends/QnnBackendFactory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,14 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
backend_params->qnn_device_ptr_ = std::make_unique<HtpDevice>(
implementation, logger, options->soc_info(), htp_options);

backend_params->qnn_backend_cache_ptr_ =
std::make_unique<HtpBackendCache>(qnn_context_blob);

backend_params->qnn_context_ptr_ = std::make_unique<HtpContext>(
implementation,
backend_params->qnn_backend_ptr_.get(),
backend_params->qnn_device_ptr_.get(),
qnn_context_blob,
backend_params->qnn_backend_cache_ptr_.get(),
htp_options);

backend_params->qnn_graph_ptr_ = std::make_unique<HtpGraph>(
Expand Down
7 changes: 6 additions & 1 deletion backends/qualcomm/runtime/backends/QnnBackendFactory.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#pragma once

#include <executorch/backends/qualcomm/runtime/QnnExecuTorch.h>
#include <executorch/backends/qualcomm/runtime/backends/QnnBackendCache.h>
#include <executorch/backends/qualcomm/runtime/backends/QnnBackendCommon.h>
#include <executorch/backends/qualcomm/runtime/backends/QnnContextCommon.h>
#include <executorch/backends/qualcomm/runtime/backends/QnnDeviceCommon.h>
Expand All @@ -16,6 +17,7 @@
#include <executorch/backends/qualcomm/runtime/backends/QnnLogger.h>
#include <executorch/backends/qualcomm/runtime/backends/QnnMemManager.h>
#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpBackend.h>
#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.h>
#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h>
#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.h>
#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpGraph.h>
Expand All @@ -35,6 +37,7 @@ typedef struct BackendConfigParameters {
std::unique_ptr<QnnDevice> qnn_device_ptr_;
std::unique_ptr<QnnGraph> qnn_graph_ptr_;
std::unique_ptr<QnnMemManager> qnn_mem_manager_ptr_;
std::unique_ptr<QnnBackendCache> qnn_backend_cache_ptr_;

// Default ctor
BackendConfigParameters()
Expand All @@ -43,10 +46,12 @@ typedef struct BackendConfigParameters {
qnn_context_ptr_(nullptr),
qnn_device_ptr_(nullptr),
qnn_graph_ptr_(nullptr),
qnn_mem_manager_ptr_(nullptr) {}
qnn_mem_manager_ptr_(nullptr),
qnn_backend_cache_ptr_(nullptr) {}
// Default dtor
~BackendConfigParameters() {
qnn_graph_ptr_.reset();
qnn_backend_cache_ptr_.reset();
qnn_mem_manager_ptr_.reset();
qnn_context_ptr_.reset();
qnn_device_ptr_.reset();
Expand Down
9 changes: 4 additions & 5 deletions backends/qualcomm/runtime/backends/QnnContextCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,12 @@ class QnnContext {
const QnnImplementation& implementation,
QnnBackend* backend,
QnnDevice* device,
const QnnExecuTorchContextBinary& qnn_context_blob)
QnnBackendCache* cache)
: handle_(nullptr),
implementation_(implementation),
backend_(backend),
device_(device) {
cache_ = std::make_unique<QnnBackendCache>(qnn_context_blob);
}
device_(device),
cache_(cache) {}

virtual ~QnnContext();
Error Configure();
Expand Down Expand Up @@ -67,7 +66,7 @@ class QnnContext {
const QnnImplementation& implementation_;
QnnBackend* backend_;
QnnDevice* device_;
std::unique_ptr<QnnBackendCache> cache_;
QnnBackendCache* cache_;
std::vector<char> binary_buffer_;
};
} // namespace qnn
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright (c) Qualcomm Innovation Center, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.h>
#include "HTP/QnnHtpSystemContext.h"

namespace torch {
namespace executor {
namespace qnn {
Error HtpBackendCache::RetrieveBackendBinaryInfo(
const QnnSystemContext_BinaryInfo_t* binaryinfo) {
QnnHtpSystemContext_HwBlobInfo_t* htp_hwblobinfo = nullptr;

if (binaryinfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_1) {
htp_hwblobinfo = static_cast<QnnHtpSystemContext_HwBlobInfo_t*>(
binaryinfo->contextBinaryInfoV1.hwInfoBlob);
} else if (binaryinfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_2) {
htp_hwblobinfo = static_cast<QnnHtpSystemContext_HwBlobInfo_t*>(
binaryinfo->contextBinaryInfoV2.hwInfoBlob);
} else {
QNN_EXECUTORCH_LOG_WARN(
"Unknown QNN BinaryInfo version %d.", binaryinfo->version);
return Error::Internal;
}

if (htp_hwblobinfo == nullptr) {
QNN_EXECUTORCH_LOG_WARN(
"Htp hardware blob information is not found in binary information.");
return Error::Ok;
}

if (htp_hwblobinfo->version ==
QNN_SYSTEM_CONTEXT_HTP_HW_INFO_BLOB_VERSION_V1) {
spill_fill_buf_ =
(*htp_hwblobinfo).contextBinaryHwInfoBlobV1_t.spillFillBufferSize;
} else {
QNN_EXECUTORCH_LOG_WARN(
"Unknown QNN Htp hw blob info version %d.", htp_hwblobinfo->version);
return Error::Internal;
}

return Error::Ok;
}

} // namespace qnn
} // namespace executor
} // namespace torch
33 changes: 33 additions & 0 deletions backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Copyright (c) Qualcomm Innovation Center, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <executorch/backends/qualcomm/runtime/backends/QnnBackendCache.h>

namespace torch {
namespace executor {
namespace qnn {
class HtpBackendCache : public QnnBackendCache {
public:
explicit HtpBackendCache(const QnnExecuTorchContextBinary& qnn_context_blob)
: QnnBackendCache(qnn_context_blob), spill_fill_buf_(0) {}
~HtpBackendCache() override = default;

uint64_t GetSpillFillBufferSize() {
return spill_fill_buf_;
}

protected:
Error RetrieveBackendBinaryInfo(
const QnnSystemContext_BinaryInfo_t* binaryinfo) override;

private:
uint64_t spill_fill_buf_;
};
} // namespace qnn
} // namespace executor
} // namespace torch
4 changes: 2 additions & 2 deletions backends/qualcomm/runtime/backends/htpbackend/HtpContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ class HtpContext : public QnnContext {
const QnnImplementation& implementation,
QnnBackend* backend,
QnnDevice* device,
const QnnExecuTorchContextBinary& qnn_context_blob,
QnnBackendCache* cache,
const QnnExecuTorchHtpBackendOptions* htp_options)
: QnnContext(implementation, backend, device, qnn_context_blob) {
: QnnContext(implementation, backend, device, cache) {
htp_context_custom_config_ =
std::make_unique<HtpContextCustomConfig>(this, htp_options);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ HtpContextCustomConfig::CreateContextCustomConfig() {
QnnHtpContext_CustomConfig_t* p_custom_config = nullptr;
const HtpContext* htp_ctx = static_cast<const HtpContext*>(context_);

if (htp_options_->use_multi_contexts()) {
if (htp_options_->use_multi_contexts() &&
htp_options_->max_sf_buf_size() != 0) {
p_custom_config = AllocContextCustomConfig();
p_custom_config->option =
QNN_HTP_CONTEXT_CONFIG_OPTION_REGISTER_MULTI_CONTEXTS;
Expand Down
16 changes: 14 additions & 2 deletions backends/qualcomm/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,16 +214,28 @@ def process_exported_program(prog):
== QnnExecuTorchBackendType.kHtpBackend
and options.backend_options.htp_options.use_multi_contexts
):
max_sf_buf_size = max(max_sf_buf_size, len(m.processed_bytes))
qnn_mgr = PyQnnManagerAdaptor.QnnManager(
m.compile_specs[0].value, m.processed_bytes
)
assert qnn_mgr.Init().value == 0, "failed to load context binary"
max_sf_buf_size = max(
max_sf_buf_size, qnn_mgr.GetSpillFillBufferSize()
)
module_map[m] = options
qnn_mgr.Destroy()
return max_sf_buf_size, module_map

def process_lowered_module(module):
qnn_mgr = PyQnnManagerAdaptor.QnnManager(
module.compile_specs[0].value, module.processed_bytes
)
assert qnn_mgr.Init().value == 0, "failed to load context binary"
spill_fill_size = (
len(module.processed_bytes)
qnn_mgr.GetSpillFillBufferSize()
if custom_buffer_size is None
else custom_buffer_size
)
qnn_mgr.Destroy()
return spill_fill_size, {
module: convert_to_option(module.compile_specs[0].value)
}
Expand Down
Loading
Loading