Skip to content

Commit 3c83553

Browse files
authored
Qualcomm AI Engine Direct - Support Hybrid Mode for Llama3.2
Differential Revision: D66895016 Pull Request resolved: #7175
1 parent 9008017 commit 3c83553

31 files changed

+1463
-583
lines changed

backends/qualcomm/CMakeLists.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,6 @@ include_directories(
7676

7777
set(_qnn_schema__srcs
7878
backends/qualcomm/serialization/qc_compiler_spec.fbs
79-
backends/qualcomm/serialization/qc_binary_info.fbs
8079
)
8180
set(_qnn_schema__include_dir "${CMAKE_BINARY_DIR}/schema/include")
8281
# Paths to headers generated from the .fbs files.
@@ -116,6 +115,7 @@ add_library(qcir_utils STATIC)
116115
add_library(qnn_backend STATIC)
117116
add_library(qnn_backend_cache STATIC)
118117
add_library(qnn_context STATIC)
118+
add_library(qnn_custom_protocol STATIC)
119119
add_library(qnn_device STATIC)
120120
add_library(qnn_executorch_backend SHARED)
121121
add_library(qnn_executorch_header INTERFACE)
@@ -155,6 +155,7 @@ target_link_libraries(qnn_executorch_logging PRIVATE qnn_schema)
155155
target_link_libraries(qnn_profiler PRIVATE qnn_executorch_logging)
156156
target_link_libraries(qnn_logger PRIVATE qnn_implementation ${android_log})
157157
target_link_libraries(qnn_backend PRIVATE qnn_implementation qnn_logger)
158+
target_link_libraries(qnn_custom_protocol PRIVATE qcir_utils)
158159
target_link_libraries(
159160
qnn_device PRIVATE qnn_executorch_logging qnn_implementation qnn_logger
160161
)
@@ -177,7 +178,7 @@ target_link_libraries(
177178
qnn_factory
178179
PUBLIC qnn_header
179180
PRIVATE qnn_schema qnn_backend qnn_device qnn_context qnn_graph
180-
qnn_mem_manager
181+
qnn_mem_manager qnn_custom_protocol
181182
)
182183
target_link_libraries(
183184
qnn_manager PRIVATE qnn_factory wrappers qnn_schema utils shared_buffer

backends/qualcomm/aot/ir/qcir.fbs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,17 +80,18 @@ table Tensor {
8080
type: TensorType;
8181
dtype: DataType;
8282
qparam: QuantizeParam;
83-
data: [ubyte];
83+
size: uint;
84+
offset: ulong;
8485
}
8586

8687
table Operator {
8788
name: string;
8889
package_name: string;
8990
type_name: string;
9091
// keep only tensor indexes
91-
inputs: [int];
92-
outputs: [int];
93-
params: [int];
92+
inputs: [uint];
93+
outputs: [uint];
94+
params: [uint];
9495
}
9596

9697
table Graph {

backends/qualcomm/aot/ir/qcir_utils.cpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -235,11 +235,8 @@ Qnn_QuantizeParams_t ToQuantizeParam(const tensor_type& tensor) {
235235

236236
flatbuffers::Offset<qcir::Tensor> ToTensor(
237237
const Qnn_Tensor_t& tensor,
238+
const uint64_t data_offset,
238239
flatbuffers::FlatBufferBuilder* builder) {
239-
std::vector<uint8_t> buffer(
240-
static_cast<uint8_t*>(QNN_VER_PTR(tensor)->clientBuf.data),
241-
static_cast<uint8_t*>(QNN_VER_PTR(tensor)->clientBuf.data) +
242-
QNN_VER_PTR(tensor)->clientBuf.dataSize);
243240
std::vector<uint32_t> shape(
244241
QNN_VER_PTR(tensor)->dimensions,
245242
QNN_VER_PTR(tensor)->dimensions + QNN_VER_PTR(tensor)->rank);
@@ -251,10 +248,11 @@ flatbuffers::Offset<qcir::Tensor> ToTensor(
251248
ToTensorType(QNN_VER_PTR(tensor)->type),
252249
ToDataType(QNN_VER_PTR(tensor)->dataType),
253250
ToQuantizeParam(tensor, builder),
254-
&buffer);
251+
QNN_VER_PTR(tensor)->clientBuf.dataSize,
252+
data_offset);
255253
}
256254

257-
Qnn_Tensor_t ToTensor(const tensor_type& tensor) {
255+
Qnn_Tensor_t ToTensor(const tensor_type& tensor, const uint8_t* data_ptr) {
258256
auto is_io_tensor = [](Qnn_TensorType_t type) {
259257
return type < QNN_TENSOR_TYPE_STATIC;
260258
};
@@ -266,10 +264,10 @@ Qnn_Tensor_t ToTensor(const tensor_type& tensor) {
266264
QNN_VER_PTR(t)->quantizeParams = ToQuantizeParam(tensor);
267265
QNN_VER_PTR(t)->rank = tensor->shape()->size();
268266
QNN_VER_PTR(t)->dimensions = const_cast<uint32_t*>(tensor->shape()->data());
269-
QNN_VER_PTR(t)->clientBuf.dataSize = tensor->data()->size();
267+
QNN_VER_PTR(t)->clientBuf.dataSize = tensor->size();
270268
QNN_VER_PTR(t)->clientBuf.data = is_io_tensor(QNN_VER_PTR(t)->type)
271269
? nullptr
272-
: static_cast<void*>(const_cast<uint8_t*>(tensor->data()->Data()));
270+
: static_cast<void*>(const_cast<uint8_t*>(data_ptr));
273271
return t;
274272
}
275273

backends/qualcomm/aot/ir/qcir_utils.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@ Qnn_QuantizeParams_t ToQuantizeParam(const tensor_type& tensor);
3232

3333
flatbuffers::Offset<qcir::Tensor> ToTensor(
3434
const Qnn_Tensor_t& tensor,
35+
const uint64_t data_offset,
3536
flatbuffers::FlatBufferBuilder* builder);
36-
Qnn_Tensor_t ToTensor(const tensor_type& tensor);
37+
Qnn_Tensor_t ToTensor(const tensor_type& tensor, const uint8_t* data_ptr);
3738

3839
} // namespace qnn
3940
} // namespace backends

0 commit comments

Comments
 (0)