Skip to content

Qualcomm AI Engine Direct - Implement sdk profiler and intergrate with Qnn profiler #2227

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions backends/qualcomm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ add_library(qnn_implementation STATIC)
add_library(qnn_sys_function_interface INTERFACE)
add_library(qnn_sys_implementation STATIC)
add_library(qnn_logger STATIC)
add_library(qnn_profiler STATIC)
add_library(qnn_device STATIC)
add_library(qnn_context STATIC)
add_library(qnn_backend_cache STATIC)
Expand Down Expand Up @@ -179,6 +180,10 @@ target_link_libraries(qnn_executorch_logging
PRIVATE
qnn_schema
)
target_link_libraries(qnn_profiler
PRIVATE
qnn_executorch_logging
)
target_link_libraries(qnn_logger
PRIVATE
qnn_implementation
Expand Down Expand Up @@ -213,6 +218,7 @@ target_link_libraries(qnn_graph
qnn_executorch_logging
qnn_implementation
qnn_context
qnn_profiler
)
target_link_libraries(qnn_factory
PUBLIC
Expand Down Expand Up @@ -249,6 +255,12 @@ target_link_libraries(utils
#
target_link_options_shared_lib(qnn_executorch_backend)

#
# add compile option
#
target_compile_options(executorch PUBLIC -DET_EVENT_TRACER_ENABLED)


#
# add sources
#
Expand Down
2 changes: 1 addition & 1 deletion backends/qualcomm/passes/i64_to_i32.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def _update_meta(self, node: torch.fx.node) -> None:
)
else:
if meta_val.dtype == torch.int64:
node.meta["val"] = meta_val.to(torch.int32)
node.meta["val"] = meta_val.to(torch.float)

def _cast_to_int32(self, graph_module: torch.fx.GraphModule):
for n in graph_module.graph.nodes:
Expand Down
6 changes: 4 additions & 2 deletions backends/qualcomm/qnn_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,5 +89,7 @@ def preprocess(
)
assert len(qnn_context_binary) != 0, "Failed to generate Qnn context binary."
qnn_manager.Destroy()

return PreprocessResult(bytes(qnn_context_binary))
# For now, debug_handle_map is not used by QNN ExecuTorch
return PreprocessResult(
processed_bytes=bytes(qnn_context_binary), debug_handle_map={}
)
7 changes: 6 additions & 1 deletion backends/qualcomm/runtime/QnnExecuTorchBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ Result<DelegateHandle*> QnnExecuTorchBackend::init(
}

Error QnnExecuTorchBackend::execute(
__ET_UNUSED BackendExecutionContext& context,
BackendExecutionContext& context,
DelegateHandle* handle,
EValue** args) const {
QnnManager* qnn_manager = static_cast<QnnManager*>(handle);
Expand Down Expand Up @@ -211,6 +211,11 @@ Error QnnExecuTorchBackend::execute(
Error::Ok,
Internal,
"Fail to execute graph");
ET_CHECK_OR_RETURN_ERROR(
qnn_manager->ProfileExecuteData(context.event_tracer()) == Error::Ok,
Internal,
"Fail to profile graph");

return Error::Ok;
}

Expand Down
17 changes: 17 additions & 0 deletions backends/qualcomm/runtime/QnnManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ QnnManager::QnnManager(
options_->tensor_dump_output_path()->c_str());
QNN_EXECUTORCH_LOG_INFO(
"log_level: %s", EnumNameQnnExecuTorchLogLevel(options_->log_level()));
QNN_EXECUTORCH_LOG_INFO(
"profile_level: %s",
EnumNameQnnExecuTorchProfileLevel(options_->profile_level()));
QNN_EXECUTORCH_LOG_INFO(
"the size of qnn context binary: %d",
qnn_executorch_context_binary.nbytes);
Expand Down Expand Up @@ -194,6 +197,20 @@ Error QnnManager::Execute(
return Error::Ok;
}

Error QnnManager::ProfileExecuteData(EventTracer* event_tracer) {
Qnn_ErrorHandle_t error = QNN_SUCCESS;
if (options_->profile_level() != QnnExecuTorchProfileLevel::kProfileOff) {
error =
backend_params_ptr_->qnn_graph_ptr_->ProfileExecuteData(event_tracer);
if (error != QNN_SUCCESS) {
QNN_EXECUTORCH_LOG_ERROR(
" Failed to profile. Error %d", QNN_GET_ERROR_CODE(error));
return Error::Internal;
}
}
return Error::Ok;
}

void QnnManager::Destroy() {
QNN_EXECUTORCH_LOG_INFO("Destroy Qnn backend parameters");
backend_params_ptr_.reset(new BackendConfigParameters());
Expand Down
2 changes: 2 additions & 0 deletions backends/qualcomm/runtime/QnnManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ class QnnManager {
const std::vector<Qnn_Tensor_t>& input_tensor_structs,
std::vector<Qnn_Tensor_t>& output_tensor_structs);

Error ProfileExecuteData(EventTracer* event_tracer);

void Destroy();

bool IsAvailable();
Expand Down
7 changes: 7 additions & 0 deletions backends/qualcomm/runtime/backends/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@ target_sources(qnn_logger
${CMAKE_CURRENT_LIST_DIR}/QnnLogger.cpp
)

# qnn_profiler
target_sources(qnn_profiler
PRIVATE
${CMAKE_CURRENT_LIST_DIR}/QnnProfiler.h
${CMAKE_CURRENT_LIST_DIR}/QnnProfiler.cpp
)

# qnn_device
set(HOST_ARCHITECTURE
${CMAKE_CURRENT_LIST_DIR}/htpbackend/${CMAKE_SYSTEM_PROCESSOR}
Expand Down
4 changes: 4 additions & 0 deletions backends/qualcomm/runtime/backends/QnnBackendCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ class QnnBackend {
: handle_(nullptr), implementation_(implementation), logger_(logger) {}

virtual ~QnnBackend();
virtual bool IsProfileEventTypeParentOfNodeTime(
QnnProfile_EventType_t /*event_type*/) {
return false;
}

Error Configure();

Expand Down
2 changes: 2 additions & 0 deletions backends/qualcomm/runtime/backends/QnnBackendFactory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,9 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(

backend_params->qnn_graph_ptr_ = std::make_unique<HtpGraph>(
implementation,
backend_params->qnn_backend_ptr_.get(),
backend_params->qnn_context_ptr_.get(),
options->profile_level(),
options->graph_name()->str(),
options->soc_info(),
htp_options);
Expand Down
16 changes: 16 additions & 0 deletions backends/qualcomm/runtime/backends/QnnGraphCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,25 @@ Error QnnGraph::Configure() {
return Error::Internal;
}

// The profiler needs to be created after the backend is created.
profile_ =
std::make_unique<QnnProfile>(implementation_, backend_, profile_level_);
return Error::Ok;
}

Qnn_ErrorHandle_t QnnGraph::GraphExecute(
const std::vector<Qnn_Tensor_t>& input_tensor_structs,
std::vector<Qnn_Tensor_t>& output_tensor_structs) {
return implementation_.GetQnnInterface().qnn_graph_execute(
handle_,
input_tensor_structs.data(),
input_tensor_structs.size(),
output_tensor_structs.data(),
output_tensor_structs.size(),
profile_->GetHandle(),
/*signalHandle=*/nullptr);
};

Error QnnGraph::EnsureTensorInQnnGraph(
const std::shared_ptr<TensorWrapper>& tensor_wrapper) {
const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
Expand Down
23 changes: 12 additions & 11 deletions backends/qualcomm/runtime/backends/QnnGraphCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <executorch/backends/qualcomm/runtime/Logging.h>
#include <executorch/backends/qualcomm/runtime/backends/QnnContextCommon.h>
#include <executorch/backends/qualcomm/runtime/backends/QnnImplementation.h>
#include <executorch/backends/qualcomm/runtime/backends/QnnProfiler.h>

#include <vector>

Expand All @@ -23,11 +24,15 @@ class QnnGraph {
public:
explicit QnnGraph(
const QnnImplementation& implementation,
QnnBackend* backend,
QnnContext* context,
const QnnExecuTorchProfileLevel& profile_level,
const std::string& graph_name)
: handle_(nullptr),
implementation_(implementation),
backend_(backend),
context_(context),
profile_level_(profile_level),
graph_name_(graph_name) {}

virtual ~QnnGraph(){};
Expand All @@ -36,16 +41,7 @@ class QnnGraph {

Qnn_ErrorHandle_t GraphExecute(
const std::vector<Qnn_Tensor_t>& input_tensor_structs,
std::vector<Qnn_Tensor_t>& output_tensor_structs) {
return implementation_.GetQnnInterface().qnn_graph_execute(
handle_,
input_tensor_structs.data(),
input_tensor_structs.size(),
output_tensor_structs.data(),
output_tensor_structs.size(),
/*profile=*/nullptr,
/*signalHandle=*/nullptr);
};
std::vector<Qnn_Tensor_t>& output_tensor_structs);

Qnn_ErrorHandle_t GraphAddNode(const Qnn_OpConfig_t& op_config) {
return implementation_.GetQnnInterface().qnn_graph_add_node(
Expand All @@ -58,7 +54,9 @@ class QnnGraph {
return implementation_.GetQnnInterface().qnn_graph_finalize(
handle_, nullptr /* profile_handle */, nullptr /* signal_handle */);
};

Qnn_ErrorHandle_t ProfileExecuteData(EventTracer* event_tracer) {
return profile_->ProfileData(event_tracer);
};
Qnn_GraphHandle_t GetHandle() {
return handle_;
}
Expand All @@ -71,8 +69,11 @@ class QnnGraph {
private:
Qnn_GraphHandle_t handle_;
const QnnImplementation& implementation_;
QnnBackend* backend_;
QnnContext* context_;
QnnExecuTorchProfileLevel profile_level_;
std::string graph_name_;
std::unique_ptr<QnnProfile> profile_;
};
} // namespace qnn
} // namespace executor
Expand Down
122 changes: 122 additions & 0 deletions backends/qualcomm/runtime/backends/QnnProfiler.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
/*
* Copyright (c) Qualcomm Innovation Center, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/backends/qualcomm/runtime/backends/QnnProfiler.h>
#include <iostream>

namespace torch {
namespace executor {
namespace qnn {

QnnProfile::QnnProfile(
const QnnImplementation& implementation,
QnnBackend* backend,
const QnnExecuTorchProfileLevel& profile_level)
: handle_(nullptr), implementation_(implementation), backend_(backend) {
if (profile_level != QnnExecuTorchProfileLevel::kProfileOff) {
const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
Qnn_ErrorHandle_t error = qnn_interface.qnn_profile_create(
backend_->GetHandle(), static_cast<int>(profile_level), &handle_);
if (error != QNN_SUCCESS) {
QNN_EXECUTORCH_LOG_WARN(
"Failed to create profile_handle for backend "
" %u, error=%d",
qnn_interface.GetBackendId(),
QNN_GET_ERROR_CODE(error));

// ignore error and continue to create backend handle...
handle_ = nullptr;
}
}
}

Qnn_ErrorHandle_t QnnProfile::ProfileData(EventTracer* event_tracer) {
const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
const QnnProfile_EventId_t* events_ptr = nullptr;
const QnnProfile_EventId_t* sub_events_ptr = nullptr;
std::uint32_t num_events = 0;
std::uint32_t num_sub_events = 0;
Qnn_ErrorHandle_t error =
qnn_interface.qnn_profile_get_events(handle_, &events_ptr, &num_events);
if (error != QNN_SUCCESS) {
QNN_EXECUTORCH_LOG_ERROR(
"ProfileData failed to get events: %d", QNN_GET_ERROR_CODE(error));
return error;
}
QnnProfile_EventData_t event_data;
for (std::uint32_t i = 0; i < num_events; ++i) {
error =
qnn_interface.qnn_profile_get_event_data(events_ptr[i], &event_data);
if (error != QNN_SUCCESS) {
QNN_EXECUTORCH_LOG_ERROR(
"ProfileData failed to get event data "
"for event %d: %d",
i,
QNN_GET_ERROR_CODE(error));
return error;
}
// Check an event's sub events only if it relates to graph execution time
// (and its sub events are the individual op executions):
if (backend_->IsProfileEventTypeParentOfNodeTime(event_data.type)) {
error = qnn_interface.qnn_profile_get_sub_events(
events_ptr[i], &sub_events_ptr, &num_sub_events);
if (error != QNN_SUCCESS) {
QNN_EXECUTORCH_LOG_ERROR(
"ProfileData failed to get sub events "
"for event %d: %d",
i,
QNN_GET_ERROR_CODE(error));
return error;
}
QnnProfile_EventData_t sub_event_data;
for (std::uint32_t j = 0; j < num_sub_events; ++j) {
error = qnn_interface.qnn_profile_get_event_data(
sub_events_ptr[j], &sub_event_data);
if (error != QNN_SUCCESS) {
QNN_EXECUTORCH_LOG_ERROR(
"ProfileData failed to get sub "
"event data for sub event %d of event %d: %d",
j,
i,
QNN_GET_ERROR_CODE(error));
return error;
}
if (sub_event_data.type == QNN_PROFILE_EVENTTYPE_NODE &&
(sub_event_data.unit == QNN_PROFILE_EVENTUNIT_MICROSEC ||
sub_event_data.unit == QNN_PROFILE_EVENTUNIT_CYCLES)) {
torch::executor::event_tracer_log_profiling_delegate(
event_tracer,
sub_event_data.identifier,
/*delegate_debug_id=*/
static_cast<torch::executor::DebugHandle>(-1),
0,
sub_event_data.value);
}
}
}
}
return error;
}

QnnProfile::~QnnProfile() {
const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
if (handle_ != nullptr) {
Qnn_ErrorHandle_t error = qnn_interface.qnn_profile_free(handle_);
if (error != QNN_SUCCESS) {
QNN_EXECUTORCH_LOG_ERROR(
"Failed to free QNN profile_handle. Backend "
"ID %u, error %d",
qnn_interface.GetBackendId(),
QNN_GET_ERROR_CODE(error));
}
handle_ = nullptr;
}
}
} // namespace qnn
} // namespace executor
} // namespace torch
Loading