Skip to content

Commit c7aae8c

Browse files
committed
Qualcomm AI Engine Direct - Implement sdk profiler and intergrated with Qnn profiler
Summary: - Implement Qnn Profiler for htp backend For now, only support kProfileDetailed to profile the performance of each operator with cycle unit. Follow up item: Add more qnn profile item - Intergrated with sdk profiler - Add the argument etdump_path to dump etdump which analyzes the contents by INSPECTOR in qnn_executorch_runner
1 parent 57e192b commit c7aae8c

22 files changed

+326
-20
lines changed

backends/qualcomm/CMakeLists.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ add_library(qnn_implementation STATIC)
131131
add_library(qnn_sys_function_interface INTERFACE)
132132
add_library(qnn_sys_implementation STATIC)
133133
add_library(qnn_logger STATIC)
134+
add_library(qnn_profiler STATIC)
134135
add_library(qnn_device STATIC)
135136
add_library(qnn_context STATIC)
136137
add_library(qnn_backend_cache STATIC)
@@ -178,6 +179,10 @@ target_link_libraries(qnn_executorch_logging
178179
PRIVATE
179180
qnn_schema
180181
)
182+
target_link_libraries(qnn_profiler
183+
PRIVATE
184+
qnn_executorch_logging
185+
)
181186
target_link_libraries(qnn_logger
182187
PRIVATE
183188
qnn_implementation
@@ -212,6 +217,7 @@ target_link_libraries(qnn_graph
212217
qnn_executorch_logging
213218
qnn_implementation
214219
qnn_context
220+
qnn_profiler
215221
)
216222
target_link_libraries(qnn_factory
217223
PUBLIC
@@ -243,6 +249,12 @@ target_link_libraries(qnn_executorch_backend
243249
#
244250
target_link_options_shared_lib(qnn_executorch_backend)
245251

252+
#
253+
# add compile option
254+
#
255+
target_compile_options(executorch PUBLIC -DET_EVENT_TRACER_ENABLED)
256+
257+
246258
#
247259
# add sources
248260
#

backends/qualcomm/qnn_preprocess.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,5 +86,7 @@ def preprocess(
8686
)
8787
assert len(qnn_context_binary) != 0, "Failed to generate Qnn context binary."
8888
qnn_manager.Destroy()
89-
90-
return PreprocessResult(bytes(qnn_context_binary))
89+
# For now, debug_handle_map is not used by QNN ExecuTorch
90+
return PreprocessResult(
91+
processed_bytes=bytes(qnn_context_binary), debug_handle_map={}
92+
)

backends/qualcomm/runtime/QnnExecuTorchBackend.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ Result<DelegateHandle*> QnnExecuTorchBackend::init(
176176
}
177177

178178
Error QnnExecuTorchBackend::execute(
179-
__ET_UNUSED BackendExecutionContext& context,
179+
BackendExecutionContext& context,
180180
DelegateHandle* handle,
181181
EValue** args) const {
182182
QnnManager* qnn_manager = static_cast<QnnManager*>(handle);
@@ -208,6 +208,11 @@ Error QnnExecuTorchBackend::execute(
208208
Error::Ok,
209209
Internal,
210210
"Fail to execute graph");
211+
ET_CHECK_OR_RETURN_ERROR(
212+
qnn_manager->ProfileExecuteData(context.event_tracer()) == Error::Ok,
213+
Internal,
214+
"Fail to profile graph");
215+
211216
return Error::Ok;
212217
}
213218

backends/qualcomm/runtime/QnnManager.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ QnnManager::QnnManager(
2929
soc_info_(options->soc_info()),
3030
htp_options_(options->htp_options()),
3131
log_level_(options->log_level()),
32+
profile_level_(options->profile_level()),
3233
qnn_context_blob_(qnn_executorch_context_binary),
3334
qnn_loaded_backend_(library_path_),
3435
online_prepare_(options->online_prepare()) {
@@ -103,6 +104,7 @@ Error QnnManager::Init() {
103104
log_level_,
104105
qnn_context_blob_,
105106
backend_type_,
107+
profile_level_,
106108
graph_name_,
107109
soc_info_,
108110
htp_options_);
@@ -174,6 +176,20 @@ Error QnnManager::Execute(
174176
return Error::Ok;
175177
}
176178

179+
Error QnnManager::ProfileExecuteData(EventTracer* event_tracer) {
180+
Qnn_ErrorHandle_t error = QNN_SUCCESS;
181+
if (profile_level_ != QnnExecuTorchProfileLevel::kProfileOff) {
182+
error =
183+
backend_params_ptr_->qnn_graph_ptr_->ProfileExecuteData(event_tracer);
184+
if (error != QNN_SUCCESS) {
185+
QNN_EXECUTORCH_LOG_ERROR(
186+
" Failed to profile. Error %d", QNN_GET_ERROR_CODE(error));
187+
return Error::Internal;
188+
}
189+
}
190+
return Error::Ok;
191+
}
192+
177193
void QnnManager::Destroy() {
178194
QNN_EXECUTORCH_LOG_INFO("Destroy Qnn backend parameters");
179195
backend_params_ptr_.reset(new BackendConfigParameters());

backends/qualcomm/runtime/QnnManager.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ class QnnManager {
3838
const std::vector<Qnn_Tensor_t>& input_tensor_structs,
3939
std::vector<Qnn_Tensor_t>& output_tensor_structs);
4040

41+
Error ProfileExecuteData(EventTracer* event_tracer);
42+
4143
void Destroy();
4244

4345
bool IsAvailable();
@@ -72,6 +74,7 @@ class QnnManager {
7274
const SocInfo* soc_info_;
7375
const QnnExecuTorchHtpBackendOptions* htp_options_;
7476
QnnExecuTorchLogLevel log_level_;
77+
QnnExecuTorchProfileLevel profile_level_;
7578
QnnExecuTorchContextBinary qnn_context_blob_;
7679
std::unique_ptr<BackendConfigParameters> backend_params_ptr_;
7780
QnnImplementation qnn_loaded_backend_;

backends/qualcomm/runtime/backends/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,13 @@ target_sources(qnn_logger
4141
${CMAKE_CURRENT_LIST_DIR}/QnnLogger.cpp
4242
)
4343

44+
# qnn_profiler
45+
target_sources(qnn_profiler
46+
PRIVATE
47+
${CMAKE_CURRENT_LIST_DIR}/QnnProfiler.h
48+
${CMAKE_CURRENT_LIST_DIR}/QnnProfiler.cpp
49+
)
50+
4451
# qnn_device
4552
set(HOST_ARCHITECTURE
4653
${CMAKE_CURRENT_LIST_DIR}/htpbackend/${CMAKE_SYSTEM_PROCESSOR}

backends/qualcomm/runtime/backends/QnnBackendCommon.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ class QnnBackend {
2727
: handle_(nullptr), implementation_(implementation), logger_(logger) {}
2828

2929
virtual ~QnnBackend();
30+
virtual bool IsProfileEventTypeParentOfNodeTime(
31+
QnnProfile_EventType_t /*event_type*/) {
32+
return false;
33+
}
3034

3135
Error Configure();
3236

backends/qualcomm/runtime/backends/QnnBackendFactory.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
1616
const QnnExecuTorchLogLevel& log_level,
1717
const QnnExecuTorchContextBinary& qnn_context_blob,
1818
const QnnExecuTorchBackendType& backend_type,
19+
const QnnExecuTorchProfileLevel& profile_level,
1920
const std::string& graph_name,
2021
const SocInfo* soc_info,
2122
const QnnExecuTorchHtpBackendOptions* htp_options) {
@@ -52,7 +53,9 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
5253

5354
backend_params->qnn_graph_ptr_ = std::make_unique<HtpGraph>(
5455
implementation,
56+
backend_params->qnn_backend_ptr_.get(),
5557
backend_params->qnn_context_ptr_.get(),
58+
profile_level,
5659
graph_name,
5760
soc_info,
5861
htp_options);

backends/qualcomm/runtime/backends/QnnBackendFactory.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ class QnnBackendFactory {
6060
const QnnExecuTorchLogLevel& log_level,
6161
const QnnExecuTorchContextBinary& qnn_context_blob,
6262
const QnnExecuTorchBackendType& backend_type,
63+
const QnnExecuTorchProfileLevel& profile_level,
6364
const std::string& graph_name,
6465
const SocInfo* soc_info,
6566
const QnnExecuTorchHtpBackendOptions* htp_options);

backends/qualcomm/runtime/backends/QnnGraphCommon.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,25 @@ Error QnnGraph::Configure() {
5151
return Error::Internal;
5252
}
5353

54+
// The profiler needs to be created after the backend is created.
55+
profile_ =
56+
std::make_unique<QnnProfile>(implementation_, backend_, profile_level_);
5457
return Error::Ok;
5558
}
5659

60+
Qnn_ErrorHandle_t QnnGraph::GraphExecute(
61+
const std::vector<Qnn_Tensor_t>& input_tensor_structs,
62+
std::vector<Qnn_Tensor_t>& output_tensor_structs) {
63+
return implementation_.GetQnnInterface().qnn_graph_execute(
64+
handle_,
65+
input_tensor_structs.data(),
66+
input_tensor_structs.size(),
67+
output_tensor_structs.data(),
68+
output_tensor_structs.size(),
69+
profile_->GetHandle(),
70+
/*signalHandle=*/nullptr);
71+
};
72+
5773
Error QnnGraph::EnsureTensorInQnnGraph(
5874
const std::shared_ptr<TensorWrapper>& tensor_wrapper) {
5975
const QnnInterface& qnn_interface = implementation_.GetQnnInterface();

backends/qualcomm/runtime/backends/QnnGraphCommon.h

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <executorch/backends/qualcomm/runtime/Logging.h>
1212
#include <executorch/backends/qualcomm/runtime/backends/QnnContextCommon.h>
1313
#include <executorch/backends/qualcomm/runtime/backends/QnnImplementation.h>
14+
#include <executorch/backends/qualcomm/runtime/backends/QnnProfiler.h>
1415

1516
#include <vector>
1617

@@ -23,11 +24,15 @@ class QnnGraph {
2324
public:
2425
explicit QnnGraph(
2526
const QnnImplementation& implementation,
27+
QnnBackend* backend,
2628
QnnContext* context,
29+
const QnnExecuTorchProfileLevel& profile_level,
2730
const std::string& graph_name)
2831
: handle_(nullptr),
2932
implementation_(implementation),
33+
backend_(backend),
3034
context_(context),
35+
profile_level_(profile_level),
3136
graph_name_(graph_name) {}
3237

3338
virtual ~QnnGraph(){};
@@ -36,16 +41,7 @@ class QnnGraph {
3641

3742
Qnn_ErrorHandle_t GraphExecute(
3843
const std::vector<Qnn_Tensor_t>& input_tensor_structs,
39-
std::vector<Qnn_Tensor_t>& output_tensor_structs) {
40-
return implementation_.GetQnnInterface().qnn_graph_execute(
41-
handle_,
42-
input_tensor_structs.data(),
43-
input_tensor_structs.size(),
44-
output_tensor_structs.data(),
45-
output_tensor_structs.size(),
46-
/*profile=*/nullptr,
47-
/*signalHandle=*/nullptr);
48-
};
44+
std::vector<Qnn_Tensor_t>& output_tensor_structs);
4945

5046
Qnn_ErrorHandle_t GraphAddNode(const Qnn_OpConfig_t& op_config) {
5147
return implementation_.GetQnnInterface().qnn_graph_add_node(
@@ -58,7 +54,9 @@ class QnnGraph {
5854
return implementation_.GetQnnInterface().qnn_graph_finalize(
5955
handle_, nullptr /* profile_handle */, nullptr /* signal_handle */);
6056
};
61-
57+
Qnn_ErrorHandle_t ProfileExecuteData(EventTracer* event_tracer) {
58+
return profile_->ProfileData(event_tracer);
59+
};
6260
Qnn_GraphHandle_t GetHandle() {
6361
return handle_;
6462
}
@@ -71,8 +69,11 @@ class QnnGraph {
7169
private:
7270
Qnn_GraphHandle_t handle_;
7371
const QnnImplementation& implementation_;
72+
QnnBackend* backend_;
7473
QnnContext* context_;
74+
QnnExecuTorchProfileLevel profile_level_;
7575
std::string graph_name_;
76+
std::unique_ptr<QnnProfile> profile_;
7677
};
7778
} // namespace qnn
7879
} // namespace executor
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
/*
2+
* Copyright (c) Qualcomm Innovation Center, Inc.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/backends/qualcomm/runtime/backends/QnnProfiler.h>
10+
#include <iostream>
11+
12+
namespace torch {
13+
namespace executor {
14+
namespace qnn {
15+
16+
QnnProfile::QnnProfile(
17+
const QnnImplementation& implementation,
18+
QnnBackend* backend,
19+
const QnnExecuTorchProfileLevel& profile_level)
20+
: handle_(nullptr), implementation_(implementation), backend_(backend) {
21+
if (profile_level != QnnExecuTorchProfileLevel::kProfileOff) {
22+
const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
23+
Qnn_ErrorHandle_t error = qnn_interface.qnn_profile_create(
24+
backend_->GetHandle(), static_cast<int>(profile_level), &handle_);
25+
if (error != QNN_SUCCESS) {
26+
QNN_EXECUTORCH_LOG_WARN(
27+
"Failed to create profile_handle for backend "
28+
" %u, error=%d",
29+
qnn_interface.GetBackendId(),
30+
QNN_GET_ERROR_CODE(error));
31+
32+
// ignore error and continue to create backend handle...
33+
handle_ = nullptr;
34+
}
35+
}
36+
}
37+
38+
Qnn_ErrorHandle_t QnnProfile::ProfileData(EventTracer* event_tracer) {
39+
const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
40+
const QnnProfile_EventId_t* events_ptr = nullptr;
41+
const QnnProfile_EventId_t* sub_events_ptr = nullptr;
42+
std::uint32_t num_events = 0;
43+
std::uint32_t num_sub_events = 0;
44+
Qnn_ErrorHandle_t error =
45+
qnn_interface.qnn_profile_get_events(handle_, &events_ptr, &num_events);
46+
if (error != QNN_SUCCESS) {
47+
QNN_EXECUTORCH_LOG_ERROR(
48+
"ProfileData failed to get events: %d", QNN_GET_ERROR_CODE(error));
49+
return error;
50+
}
51+
QnnProfile_EventData_t event_data;
52+
for (std::uint32_t i = 0; i < num_events; ++i) {
53+
error =
54+
qnn_interface.qnn_profile_get_event_data(events_ptr[i], &event_data);
55+
if (error != QNN_SUCCESS) {
56+
QNN_EXECUTORCH_LOG_ERROR(
57+
"ProfileData failed to get event data "
58+
"for event %d: %d",
59+
i,
60+
QNN_GET_ERROR_CODE(error));
61+
return error;
62+
}
63+
// Check an event's sub events only if it relates to graph execution time
64+
// (and its sub events are the individual op executions):
65+
if (backend_->IsProfileEventTypeParentOfNodeTime(event_data.type)) {
66+
error = qnn_interface.qnn_profile_get_sub_events(
67+
events_ptr[i], &sub_events_ptr, &num_sub_events);
68+
if (error != QNN_SUCCESS) {
69+
QNN_EXECUTORCH_LOG_ERROR(
70+
"ProfileData failed to get sub events "
71+
"for event %d: %d",
72+
i,
73+
QNN_GET_ERROR_CODE(error));
74+
return error;
75+
}
76+
QnnProfile_EventData_t sub_event_data;
77+
for (std::uint32_t j = 0; j < num_sub_events; ++j) {
78+
error = qnn_interface.qnn_profile_get_event_data(
79+
sub_events_ptr[j], &sub_event_data);
80+
if (error != QNN_SUCCESS) {
81+
QNN_EXECUTORCH_LOG_ERROR(
82+
"ProfileData failed to get sub "
83+
"event data for sub event %d of event %d: %d",
84+
j,
85+
i,
86+
QNN_GET_ERROR_CODE(error));
87+
return error;
88+
}
89+
if (sub_event_data.type == QNN_PROFILE_EVENTTYPE_NODE &&
90+
(sub_event_data.unit == QNN_PROFILE_EVENTUNIT_MICROSEC ||
91+
sub_event_data.unit == QNN_PROFILE_EVENTUNIT_CYCLES)) {
92+
torch::executor::event_tracer_log_profiling_delegate(
93+
event_tracer,
94+
sub_event_data.identifier,
95+
/*delegate_debug_id=*/
96+
static_cast<torch::executor::DebugHandle>(-1),
97+
0,
98+
sub_event_data.value);
99+
}
100+
}
101+
}
102+
}
103+
return error;
104+
}
105+
106+
QnnProfile::~QnnProfile() {
107+
const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
108+
if (handle_ != nullptr) {
109+
Qnn_ErrorHandle_t error = qnn_interface.qnn_profile_free(handle_);
110+
if (error != QNN_SUCCESS) {
111+
QNN_EXECUTORCH_LOG_ERROR(
112+
"Failed to free QNN profile_handle. Backend "
113+
"ID %u, error %d",
114+
qnn_interface.GetBackendId(),
115+
QNN_GET_ERROR_CODE(error));
116+
}
117+
handle_ = nullptr;
118+
}
119+
}
120+
} // namespace qnn
121+
} // namespace executor
122+
} // namespace torch

0 commit comments

Comments
 (0)