Skip to content

Commit 2e204b9

Browse files
authored
Qualcomm AI Engine Direct - dynamic shape support (#7780)
summary: - dynamic shape related change for QC backend - brekage fix - test cases
1 parent 0a533b0 commit 2e204b9

File tree

18 files changed

+541
-191
lines changed

18 files changed

+541
-191
lines changed

backends/qualcomm/aot/ir/qcir.fbs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ table QuantizeParam {
7777
table Tensor {
7878
name: string;
7979
shape: [uint];
80+
dynamic_dims: [ubyte];
8081
type: TensorType;
8182
dtype: DataType;
8283
qparam: QuantizeParam;

backends/qualcomm/aot/ir/qcir_utils.cpp

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ flatbuffers::Offset<qcir::QuantizeParam> ToQuantizeParam(
124124

125125
int32_t axis = 0;
126126
uint32_t bitwidth = 0;
127-
auto param = QNN_VER_PTR(tensor)->quantizeParams;
127+
auto param = QNN_TENSOR_VER_PTR(tensor)->quantizeParams;
128128
auto quant_type = type_map.at(param.quantizationEncoding);
129129
std::vector<qcir::ScaleOffset> data;
130130
std::vector<float> scales;
@@ -238,17 +238,23 @@ flatbuffers::Offset<qcir::Tensor> ToTensor(
238238
const uint64_t data_offset,
239239
flatbuffers::FlatBufferBuilder* builder) {
240240
std::vector<uint32_t> shape(
241-
QNN_VER_PTR(tensor)->dimensions,
242-
QNN_VER_PTR(tensor)->dimensions + QNN_VER_PTR(tensor)->rank);
241+
QNN_TENSOR_VER_PTR(tensor)->dimensions,
242+
QNN_TENSOR_VER_PTR(tensor)->dimensions +
243+
QNN_TENSOR_VER_PTR(tensor)->rank);
244+
std::vector<uint8_t> dynamic_dims(
245+
QNN_TENSOR_VER_PTR(tensor)->isDynamicDimensions,
246+
QNN_TENSOR_VER_PTR(tensor)->isDynamicDimensions +
247+
QNN_TENSOR_VER_PTR(tensor)->rank);
243248

244249
return qcir::CreateTensorDirect(
245250
*builder,
246-
QNN_VER_PTR(tensor)->name,
251+
QNN_TENSOR_VER_PTR(tensor)->name,
247252
&shape,
248-
ToTensorType(QNN_VER_PTR(tensor)->type),
249-
ToDataType(QNN_VER_PTR(tensor)->dataType),
253+
&dynamic_dims,
254+
ToTensorType(QNN_TENSOR_VER_PTR(tensor)->type),
255+
ToDataType(QNN_TENSOR_VER_PTR(tensor)->dataType),
250256
ToQuantizeParam(tensor, builder),
251-
QNN_VER_PTR(tensor)->clientBuf.dataSize,
257+
QNN_TENSOR_VER_PTR(tensor)->clientBuf.dataSize,
252258
data_offset);
253259
}
254260

@@ -257,15 +263,19 @@ Qnn_Tensor_t ToTensor(const tensor_type& tensor, const uint8_t* data_ptr) {
257263
return type < QNN_TENSOR_TYPE_STATIC;
258264
};
259265

260-
Qnn_Tensor_t t = QNN_TENSOR_INIT;
261-
QNN_VER_PTR(t)->name = tensor->name()->c_str();
262-
QNN_VER_PTR(t)->type = ToTensorType(tensor->type());
263-
QNN_VER_PTR(t)->dataType = ToDataType(tensor->dtype());
264-
QNN_VER_PTR(t)->quantizeParams = ToQuantizeParam(tensor);
265-
QNN_VER_PTR(t)->rank = tensor->shape()->size();
266-
QNN_VER_PTR(t)->dimensions = const_cast<uint32_t*>(tensor->shape()->data());
267-
QNN_VER_PTR(t)->clientBuf.dataSize = tensor->size();
268-
QNN_VER_PTR(t)->clientBuf.data = is_io_tensor(QNN_VER_PTR(t)->type)
266+
Qnn_Tensor_t t({.version = QNN_TENSOR_VERSION_2, .v2 = QNN_TENSOR_V2_INIT});
267+
QNN_TENSOR_VER_PTR(t)->name = tensor->name()->c_str();
268+
QNN_TENSOR_VER_PTR(t)->type = ToTensorType(tensor->type());
269+
QNN_TENSOR_VER_PTR(t)->dataType = ToDataType(tensor->dtype());
270+
QNN_TENSOR_VER_PTR(t)->quantizeParams = ToQuantizeParam(tensor);
271+
QNN_TENSOR_VER_PTR(t)->rank = tensor->shape()->size();
272+
QNN_TENSOR_VER_PTR(t)->dimensions =
273+
const_cast<uint32_t*>(tensor->shape()->data());
274+
QNN_TENSOR_VER_PTR(t)->isDynamicDimensions =
275+
const_cast<uint8_t*>(tensor->dynamic_dims()->data());
276+
QNN_TENSOR_VER_PTR(t)->clientBuf.dataSize = tensor->size();
277+
QNN_TENSOR_VER_PTR(t)->clientBuf.data =
278+
is_io_tensor(QNN_TENSOR_VER_PTR(t)->type)
269279
? nullptr
270280
: static_cast<void*>(const_cast<uint8_t*>(data_ptr));
271281
return t;

backends/qualcomm/aot/python/PyQnnManagerAdaptor.h

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -215,13 +215,13 @@ class PyQnnManager {
215215
Qnn_Tensor_t qnn_tensor = wrapper->CloneTensorStruct();
216216
fb_tensors.emplace_back(
217217
ToTensor(qnn_tensor, offsets.back(), &builder_));
218-
uint8_t* data_ptr =
219-
static_cast<uint8_t*>(QNN_VER_PTR(qnn_tensor)->clientBuf.data);
218+
uint8_t* data_ptr = static_cast<uint8_t*>(
219+
QNN_TENSOR_VER_PTR(qnn_tensor)->clientBuf.data);
220220
if (data_ptr != nullptr) {
221221
tensor_data.insert(
222222
tensor_data.end(),
223223
data_ptr,
224-
data_ptr + QNN_VER_PTR(qnn_tensor)->clientBuf.dataSize);
224+
data_ptr + QNN_TENSOR_VER_PTR(qnn_tensor)->clientBuf.dataSize);
225225
}
226226
}
227227
};
@@ -251,22 +251,23 @@ class PyQnnManager {
251251
return py::array_t<char>(0);
252252
}
253253
Qnn_Param_t p = param->GetQnnParam();
254-
Qnn_Tensor_t t = QNN_TENSOR_INIT;
255-
QNN_VER_PTR(t)->name = p.name;
256-
QNN_VER_PTR(t)->dataType = p.scalarParam.dataType;
257-
QNN_VER_PTR(t)->clientBuf.data =
254+
Qnn_Tensor_t t(
255+
{.version = QNN_TENSOR_VERSION_2, .v2 = QNN_TENSOR_V2_INIT});
256+
QNN_TENSOR_VER_PTR(t)->name = p.name;
257+
QNN_TENSOR_VER_PTR(t)->dataType = p.scalarParam.dataType;
258+
QNN_TENSOR_VER_PTR(t)->clientBuf.data =
258259
static_cast<void*>(&p.scalarParam.uint8Value);
259-
QNN_VER_PTR(t)->clientBuf.dataSize =
260-
GetDataTypeSize(QNN_VER_PTR(t)->dataType);
260+
QNN_TENSOR_VER_PTR(t)->clientBuf.dataSize =
261+
GetDataTypeSize(QNN_TENSOR_VER_PTR(t)->dataType);
261262

262263
// collect tensor data
263264
offsets.push_back(tensor_data.size());
264265
const uint8_t* data_ptr =
265-
static_cast<uint8_t*>(QNN_VER_PTR(t)->clientBuf.data);
266+
static_cast<uint8_t*>(QNN_TENSOR_VER_PTR(t)->clientBuf.data);
266267
tensor_data.insert(
267268
tensor_data.end(),
268269
data_ptr,
269-
data_ptr + QNN_VER_PTR(t)->clientBuf.dataSize);
270+
data_ptr + QNN_TENSOR_VER_PTR(t)->clientBuf.dataSize);
270271
params.push_back(fb_tensors.size());
271272
fb_tensors.emplace_back(ToTensor(t, offsets.back(), &builder_));
272273
}
@@ -275,9 +276,9 @@ class PyQnnManager {
275276
Qnn_OpConfig_t op_config = op_wrapper->GetOpConfig();
276277
fb_ops.emplace_back(qcir::CreateOperatorDirect(
277278
builder_,
278-
QNN_VER_PTR(op_config)->name,
279-
QNN_VER_PTR(op_config)->packageName,
280-
QNN_VER_PTR(op_config)->typeName,
279+
QNN_OP_VER_PTR(op_config)->name,
280+
QNN_OP_VER_PTR(op_config)->packageName,
281+
QNN_OP_VER_PTR(op_config)->typeName,
281282
&inputs,
282283
&outputs,
283284
&params));

backends/qualcomm/aot/python/PyQnnWrapperAdaptor.cpp

Lines changed: 37 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ std::shared_ptr<TensorWrapper> CreateTensorWrapper(
105105
py::dict& quant_info,
106106
std::uint32_t rank,
107107
const std::vector<uint32_t>& dims,
108+
const std::vector<uint8_t>& dynamic_dims,
108109
py::array& data,
109110
bool copy_data) {
110111
std::unique_ptr<QuantizeParamsWrapper> quantize_param_wrapper =
@@ -117,6 +118,7 @@ std::shared_ptr<TensorWrapper> CreateTensorWrapper(
117118
std::move(quantize_param_wrapper),
118119
rank,
119120
dims.data(),
121+
dynamic_dims.data(),
120122
0,
121123
data.size() == 0 ? nullptr : data.data(),
122124
copy_data);
@@ -228,22 +230,27 @@ PYBIND11_MODULE(PyQnnWrapperAdaptor, m) {
228230
py::list input_tensors_list;
229231
py::list output_tensors_list;
230232
result["version"] = op_config.version;
231-
result["name"] = op_config.v1.name;
232-
result["packageName"] = op_config.v1.packageName;
233-
result["typeName"] = op_config.v1.typeName;
234-
result["numOfParams"] = op_config.v1.numOfParams;
235-
for (size_t i = 0; i < op_config.v1.numOfParams; ++i) {
236-
params_list.append(op_config.v1.params[i]);
233+
result["name"] = QNN_OP_VER_PTR(op_config)->name;
234+
result["packageName"] = QNN_OP_VER_PTR(op_config)->packageName;
235+
result["typeName"] = QNN_OP_VER_PTR(op_config)->typeName;
236+
result["numOfParams"] = QNN_OP_VER_PTR(op_config)->numOfParams;
237+
for (size_t i = 0; i < QNN_OP_VER_PTR(op_config)->numOfParams;
238+
++i) {
239+
params_list.append(QNN_OP_VER_PTR(op_config)->params[i]);
237240
}
238241
result["params"] = params_list;
239-
result["numOfInputs"] = op_config.v1.numOfInputs;
240-
for (size_t i = 0; i < op_config.v1.numOfInputs; ++i) {
241-
input_tensors_list.append(op_config.v1.inputTensors[i]);
242+
result["numOfInputs"] = QNN_OP_VER_PTR(op_config)->numOfInputs;
243+
for (size_t i = 0; i < QNN_OP_VER_PTR(op_config)->numOfInputs;
244+
++i) {
245+
input_tensors_list.append(
246+
QNN_OP_VER_PTR(op_config)->inputTensors[i]);
242247
}
243248
result["inputTensors"] = input_tensors_list;
244-
result["numOfOutputs"] = op_config.v1.numOfOutputs;
245-
for (size_t i = 0; i < op_config.v1.numOfOutputs; ++i) {
246-
output_tensors_list.append(op_config.v1.outputTensors[i]);
249+
result["numOfOutputs"] = QNN_OP_VER_PTR(op_config)->numOfOutputs;
250+
for (size_t i = 0; i < QNN_OP_VER_PTR(op_config)->numOfOutputs;
251+
++i) {
252+
output_tensors_list.append(
253+
QNN_OP_VER_PTR(op_config)->outputTensors[i]);
247254
}
248255
result["outputTensors"] = output_tensors_list;
249256
return result;
@@ -259,6 +266,7 @@ PYBIND11_MODULE(PyQnnWrapperAdaptor, m) {
259266
py::dict&,
260267
std::uint32_t,
261268
const std::vector<uint32_t>&,
269+
const std::vector<uint8_t>&,
262270
py::array&,
263271
bool>(&CreateTensorWrapper)));
264272

@@ -376,14 +384,6 @@ PYBIND11_MODULE(PyQnnWrapperAdaptor, m) {
376384

377385
py::class_<Qnn_Tensor_t>(m, "Qnn_Tensor_t")
378386
.def_readonly("version", &Qnn_Tensor_t::version)
379-
.def_property_readonly(
380-
"v1",
381-
[](Qnn_Tensor_t& t) -> Qnn_TensorV1_t& {
382-
if (t.version == QNN_TENSOR_VERSION_1) {
383-
return t.v1;
384-
}
385-
throw std::runtime_error("Tensor version is not V1.");
386-
})
387387
.def_property_readonly("v2", [](Qnn_Tensor_t& t) -> Qnn_TensorV2_t& {
388388
if (t.version == QNN_TENSOR_VERSION_2) {
389389
return t.v2;
@@ -399,21 +399,28 @@ PYBIND11_MODULE(PyQnnWrapperAdaptor, m) {
399399
Qnn_TensorVersion_t::QNN_TENSOR_VERSION_UNDEFINED)
400400
.export_values();
401401

402-
py::class_<Qnn_TensorV1_t>(m, "QnnTensorV1")
403-
.def_readonly("id", &Qnn_TensorV1_t::id)
404-
.def_readonly("name", &Qnn_TensorV1_t::name)
405-
.def_readonly("type", &Qnn_TensorV1_t::type)
406-
.def_readonly("dataFormat", &Qnn_TensorV1_t::dataFormat)
407-
.def_readonly("dataType", &Qnn_TensorV1_t::dataType)
408-
.def_readonly("quantizeParams", &Qnn_TensorV1_t::quantizeParams)
409-
.def_readonly("rank", &Qnn_TensorV1_t::rank)
402+
py::class_<Qnn_TensorV2_t>(m, "Qnn_TensorV2_t")
403+
.def_readonly("id", &Qnn_TensorV2_t::id)
404+
.def_readonly("name", &Qnn_TensorV2_t::name)
405+
.def_readonly("type", &Qnn_TensorV2_t::type)
406+
.def_readonly("dataFormat", &Qnn_TensorV2_t::dataFormat)
407+
.def_readonly("dataType", &Qnn_TensorV2_t::dataType)
408+
.def_readonly("quantizeParams", &Qnn_TensorV2_t::quantizeParams)
409+
.def_readonly("rank", &Qnn_TensorV2_t::rank)
410410
// change dimensions pointer to vector(begin to rank)
411411
.def_property_readonly(
412412
"dimensions",
413-
[](const Qnn_TensorV1_t& t) {
413+
[](const Qnn_TensorV2_t& t) {
414414
return std::vector<uint32_t>(t.dimensions, t.dimensions + t.rank);
415415
})
416-
.def_readonly("memType", &Qnn_TensorV1_t::memType);
416+
.def_property_readonly(
417+
"isDynamicDimensions",
418+
[](const Qnn_TensorV2_t& t) {
419+
return t.dimensions == nullptr
420+
? std::vector<uint32_t>()
421+
: std::vector<uint32_t>(t.dimensions, t.dimensions + t.rank);
422+
})
423+
.def_readonly("memType", &Qnn_TensorV2_t::memType);
417424

418425
py::enum_<Qnn_TensorMemType_t>(m, "Qnn_TensorMemType_t")
419426
.value(

backends/qualcomm/aot/wrappers/OpWrapper.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,19 +27,19 @@ Qnn_OpConfig_t OpWrapper::GetOpConfig() {
2727

2828
Qnn_OpConfig_t ret = QNN_OPCONFIG_INIT;
2929
ret.version = QNN_OPCONFIG_VERSION_1;
30-
Qnn_OpConfigV1_t& op_config = ret.v1;
30+
auto op_config = QNN_OP_VER_PTR(ret);
3131

32-
op_config.name = name_.c_str();
33-
op_config.packageName = package_name_.c_str();
34-
op_config.typeName = op_type_.c_str();
35-
op_config.numOfParams = static_cast<std::uint32_t>(param_types_.size());
36-
op_config.params = param_types_.data();
37-
op_config.numOfInputs =
32+
op_config->name = name_.c_str();
33+
op_config->packageName = package_name_.c_str();
34+
op_config->typeName = op_type_.c_str();
35+
op_config->numOfParams = static_cast<std::uint32_t>(param_types_.size());
36+
op_config->params = param_types_.data();
37+
op_config->numOfInputs =
3838
static_cast<std::uint32_t>(input_tensor_structs_.size());
39-
op_config.inputTensors = input_tensor_structs_.data();
40-
op_config.numOfOutputs =
39+
op_config->inputTensors = input_tensor_structs_.data();
40+
op_config->numOfOutputs =
4141
static_cast<std::uint32_t>(output_tensor_structs_.size());
42-
op_config.outputTensors = output_tensor_structs_.data();
42+
op_config->outputTensors = output_tensor_structs_.data();
4343

4444
return ret;
4545
}

backends/qualcomm/aot/wrappers/OpWrapper.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,12 +68,14 @@ class OpWrapper final {
6868
std::unique_ptr<QuantizeParamsWrapper> quantize_param_wrapper =
6969
std::make_unique<UndefinedQuantizeParamsWrapper>();
7070
constexpr std::uint32_t kBytes = 0;
71+
std::vector<uint8_t> dynamic_dims(rank, 0);
7172
std::shared_ptr<TensorWrapper> tensor_wrapper = CreateTensorWrapper(
7273
QNN_TENSOR_TYPE_STATIC,
7374
data_type,
7475
std::move(quantize_param_wrapper),
7576
rank,
7677
dims,
78+
dynamic_dims.data(),
7779
kBytes,
7880
data,
7981
copy_data);

0 commit comments

Comments
 (0)