Skip to content

Qualcomm AI Engine Direct - dynamic shape support #7780

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions backends/qualcomm/aot/ir/qcir.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ table QuantizeParam {
table Tensor {
name: string;
shape: [uint];
dynamic_dims: [ubyte];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a heads up, this year we'd need to figure out the story for BC/FC, and afterwards we'd need to make the flatbuffers bc compatible (like adding to the end instead of inserting)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, will pay attention to this. I think we also have plan to phase out qcir by replacing online_prepare with QNN DLC, which will mitigate the maintaining effort. And it could be fully deprecated once multi-method RFC comes out.

type: TensorType;
dtype: DataType;
qparam: QuantizeParam;
Expand Down
42 changes: 26 additions & 16 deletions backends/qualcomm/aot/ir/qcir_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ flatbuffers::Offset<qcir::QuantizeParam> ToQuantizeParam(

int32_t axis = 0;
uint32_t bitwidth = 0;
auto param = QNN_VER_PTR(tensor)->quantizeParams;
auto param = QNN_TENSOR_VER_PTR(tensor)->quantizeParams;
auto quant_type = type_map.at(param.quantizationEncoding);
std::vector<qcir::ScaleOffset> data;
std::vector<float> scales;
Expand Down Expand Up @@ -238,17 +238,23 @@ flatbuffers::Offset<qcir::Tensor> ToTensor(
const uint64_t data_offset,
flatbuffers::FlatBufferBuilder* builder) {
std::vector<uint32_t> shape(
QNN_VER_PTR(tensor)->dimensions,
QNN_VER_PTR(tensor)->dimensions + QNN_VER_PTR(tensor)->rank);
QNN_TENSOR_VER_PTR(tensor)->dimensions,
QNN_TENSOR_VER_PTR(tensor)->dimensions +
QNN_TENSOR_VER_PTR(tensor)->rank);
std::vector<uint8_t> dynamic_dims(
QNN_TENSOR_VER_PTR(tensor)->isDynamicDimensions,
QNN_TENSOR_VER_PTR(tensor)->isDynamicDimensions +
QNN_TENSOR_VER_PTR(tensor)->rank);

return qcir::CreateTensorDirect(
*builder,
QNN_VER_PTR(tensor)->name,
QNN_TENSOR_VER_PTR(tensor)->name,
&shape,
ToTensorType(QNN_VER_PTR(tensor)->type),
ToDataType(QNN_VER_PTR(tensor)->dataType),
&dynamic_dims,
ToTensorType(QNN_TENSOR_VER_PTR(tensor)->type),
ToDataType(QNN_TENSOR_VER_PTR(tensor)->dataType),
ToQuantizeParam(tensor, builder),
QNN_VER_PTR(tensor)->clientBuf.dataSize,
QNN_TENSOR_VER_PTR(tensor)->clientBuf.dataSize,
data_offset);
}

Expand All @@ -257,15 +263,19 @@ Qnn_Tensor_t ToTensor(const tensor_type& tensor, const uint8_t* data_ptr) {
return type < QNN_TENSOR_TYPE_STATIC;
};

Qnn_Tensor_t t = QNN_TENSOR_INIT;
QNN_VER_PTR(t)->name = tensor->name()->c_str();
QNN_VER_PTR(t)->type = ToTensorType(tensor->type());
QNN_VER_PTR(t)->dataType = ToDataType(tensor->dtype());
QNN_VER_PTR(t)->quantizeParams = ToQuantizeParam(tensor);
QNN_VER_PTR(t)->rank = tensor->shape()->size();
QNN_VER_PTR(t)->dimensions = const_cast<uint32_t*>(tensor->shape()->data());
QNN_VER_PTR(t)->clientBuf.dataSize = tensor->size();
QNN_VER_PTR(t)->clientBuf.data = is_io_tensor(QNN_VER_PTR(t)->type)
Qnn_Tensor_t t({.version = QNN_TENSOR_VERSION_2, .v2 = QNN_TENSOR_V2_INIT});
QNN_TENSOR_VER_PTR(t)->name = tensor->name()->c_str();
QNN_TENSOR_VER_PTR(t)->type = ToTensorType(tensor->type());
QNN_TENSOR_VER_PTR(t)->dataType = ToDataType(tensor->dtype());
QNN_TENSOR_VER_PTR(t)->quantizeParams = ToQuantizeParam(tensor);
QNN_TENSOR_VER_PTR(t)->rank = tensor->shape()->size();
QNN_TENSOR_VER_PTR(t)->dimensions =
const_cast<uint32_t*>(tensor->shape()->data());
QNN_TENSOR_VER_PTR(t)->isDynamicDimensions =
const_cast<uint8_t*>(tensor->dynamic_dims()->data());
QNN_TENSOR_VER_PTR(t)->clientBuf.dataSize = tensor->size();
QNN_TENSOR_VER_PTR(t)->clientBuf.data =
is_io_tensor(QNN_TENSOR_VER_PTR(t)->type)
? nullptr
: static_cast<void*>(const_cast<uint8_t*>(data_ptr));
return t;
Expand Down
29 changes: 15 additions & 14 deletions backends/qualcomm/aot/python/PyQnnManagerAdaptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,13 +215,13 @@ class PyQnnManager {
Qnn_Tensor_t qnn_tensor = wrapper->CloneTensorStruct();
fb_tensors.emplace_back(
ToTensor(qnn_tensor, offsets.back(), &builder_));
uint8_t* data_ptr =
static_cast<uint8_t*>(QNN_VER_PTR(qnn_tensor)->clientBuf.data);
uint8_t* data_ptr = static_cast<uint8_t*>(
QNN_TENSOR_VER_PTR(qnn_tensor)->clientBuf.data);
if (data_ptr != nullptr) {
tensor_data.insert(
tensor_data.end(),
data_ptr,
data_ptr + QNN_VER_PTR(qnn_tensor)->clientBuf.dataSize);
data_ptr + QNN_TENSOR_VER_PTR(qnn_tensor)->clientBuf.dataSize);
}
}
};
Expand Down Expand Up @@ -251,22 +251,23 @@ class PyQnnManager {
return py::array_t<char>(0);
}
Qnn_Param_t p = param->GetQnnParam();
Qnn_Tensor_t t = QNN_TENSOR_INIT;
QNN_VER_PTR(t)->name = p.name;
QNN_VER_PTR(t)->dataType = p.scalarParam.dataType;
QNN_VER_PTR(t)->clientBuf.data =
Qnn_Tensor_t t(
{.version = QNN_TENSOR_VERSION_2, .v2 = QNN_TENSOR_V2_INIT});
QNN_TENSOR_VER_PTR(t)->name = p.name;
QNN_TENSOR_VER_PTR(t)->dataType = p.scalarParam.dataType;
QNN_TENSOR_VER_PTR(t)->clientBuf.data =
static_cast<void*>(&p.scalarParam.uint8Value);
QNN_VER_PTR(t)->clientBuf.dataSize =
GetDataTypeSize(QNN_VER_PTR(t)->dataType);
QNN_TENSOR_VER_PTR(t)->clientBuf.dataSize =
GetDataTypeSize(QNN_TENSOR_VER_PTR(t)->dataType);

// collect tensor data
offsets.push_back(tensor_data.size());
const uint8_t* data_ptr =
static_cast<uint8_t*>(QNN_VER_PTR(t)->clientBuf.data);
static_cast<uint8_t*>(QNN_TENSOR_VER_PTR(t)->clientBuf.data);
tensor_data.insert(
tensor_data.end(),
data_ptr,
data_ptr + QNN_VER_PTR(t)->clientBuf.dataSize);
data_ptr + QNN_TENSOR_VER_PTR(t)->clientBuf.dataSize);
params.push_back(fb_tensors.size());
fb_tensors.emplace_back(ToTensor(t, offsets.back(), &builder_));
}
Expand All @@ -275,9 +276,9 @@ class PyQnnManager {
Qnn_OpConfig_t op_config = op_wrapper->GetOpConfig();
fb_ops.emplace_back(qcir::CreateOperatorDirect(
builder_,
QNN_VER_PTR(op_config)->name,
QNN_VER_PTR(op_config)->packageName,
QNN_VER_PTR(op_config)->typeName,
QNN_OP_VER_PTR(op_config)->name,
QNN_OP_VER_PTR(op_config)->packageName,
QNN_OP_VER_PTR(op_config)->typeName,
&inputs,
&outputs,
&params));
Expand Down
67 changes: 37 additions & 30 deletions backends/qualcomm/aot/python/PyQnnWrapperAdaptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ std::shared_ptr<TensorWrapper> CreateTensorWrapper(
py::dict& quant_info,
std::uint32_t rank,
const std::vector<uint32_t>& dims,
const std::vector<uint8_t>& dynamic_dims,
py::array& data,
bool copy_data) {
std::unique_ptr<QuantizeParamsWrapper> quantize_param_wrapper =
Expand All @@ -117,6 +118,7 @@ std::shared_ptr<TensorWrapper> CreateTensorWrapper(
std::move(quantize_param_wrapper),
rank,
dims.data(),
dynamic_dims.data(),
0,
data.size() == 0 ? nullptr : data.data(),
copy_data);
Expand Down Expand Up @@ -228,22 +230,27 @@ PYBIND11_MODULE(PyQnnWrapperAdaptor, m) {
py::list input_tensors_list;
py::list output_tensors_list;
result["version"] = op_config.version;
result["name"] = op_config.v1.name;
result["packageName"] = op_config.v1.packageName;
result["typeName"] = op_config.v1.typeName;
result["numOfParams"] = op_config.v1.numOfParams;
for (size_t i = 0; i < op_config.v1.numOfParams; ++i) {
params_list.append(op_config.v1.params[i]);
result["name"] = QNN_OP_VER_PTR(op_config)->name;
result["packageName"] = QNN_OP_VER_PTR(op_config)->packageName;
result["typeName"] = QNN_OP_VER_PTR(op_config)->typeName;
result["numOfParams"] = QNN_OP_VER_PTR(op_config)->numOfParams;
for (size_t i = 0; i < QNN_OP_VER_PTR(op_config)->numOfParams;
++i) {
params_list.append(QNN_OP_VER_PTR(op_config)->params[i]);
}
result["params"] = params_list;
result["numOfInputs"] = op_config.v1.numOfInputs;
for (size_t i = 0; i < op_config.v1.numOfInputs; ++i) {
input_tensors_list.append(op_config.v1.inputTensors[i]);
result["numOfInputs"] = QNN_OP_VER_PTR(op_config)->numOfInputs;
for (size_t i = 0; i < QNN_OP_VER_PTR(op_config)->numOfInputs;
++i) {
input_tensors_list.append(
QNN_OP_VER_PTR(op_config)->inputTensors[i]);
}
result["inputTensors"] = input_tensors_list;
result["numOfOutputs"] = op_config.v1.numOfOutputs;
for (size_t i = 0; i < op_config.v1.numOfOutputs; ++i) {
output_tensors_list.append(op_config.v1.outputTensors[i]);
result["numOfOutputs"] = QNN_OP_VER_PTR(op_config)->numOfOutputs;
for (size_t i = 0; i < QNN_OP_VER_PTR(op_config)->numOfOutputs;
++i) {
output_tensors_list.append(
QNN_OP_VER_PTR(op_config)->outputTensors[i]);
}
result["outputTensors"] = output_tensors_list;
return result;
Expand All @@ -259,6 +266,7 @@ PYBIND11_MODULE(PyQnnWrapperAdaptor, m) {
py::dict&,
std::uint32_t,
const std::vector<uint32_t>&,
const std::vector<uint8_t>&,
py::array&,
bool>(&CreateTensorWrapper)));

Expand Down Expand Up @@ -376,14 +384,6 @@ PYBIND11_MODULE(PyQnnWrapperAdaptor, m) {

py::class_<Qnn_Tensor_t>(m, "Qnn_Tensor_t")
.def_readonly("version", &Qnn_Tensor_t::version)
.def_property_readonly(
"v1",
[](Qnn_Tensor_t& t) -> Qnn_TensorV1_t& {
if (t.version == QNN_TENSOR_VERSION_1) {
return t.v1;
}
throw std::runtime_error("Tensor version is not V1.");
})
.def_property_readonly("v2", [](Qnn_Tensor_t& t) -> Qnn_TensorV2_t& {
if (t.version == QNN_TENSOR_VERSION_2) {
return t.v2;
Expand All @@ -399,21 +399,28 @@ PYBIND11_MODULE(PyQnnWrapperAdaptor, m) {
Qnn_TensorVersion_t::QNN_TENSOR_VERSION_UNDEFINED)
.export_values();

py::class_<Qnn_TensorV1_t>(m, "QnnTensorV1")
.def_readonly("id", &Qnn_TensorV1_t::id)
.def_readonly("name", &Qnn_TensorV1_t::name)
.def_readonly("type", &Qnn_TensorV1_t::type)
.def_readonly("dataFormat", &Qnn_TensorV1_t::dataFormat)
.def_readonly("dataType", &Qnn_TensorV1_t::dataType)
.def_readonly("quantizeParams", &Qnn_TensorV1_t::quantizeParams)
.def_readonly("rank", &Qnn_TensorV1_t::rank)
py::class_<Qnn_TensorV2_t>(m, "Qnn_TensorV2_t")
.def_readonly("id", &Qnn_TensorV2_t::id)
.def_readonly("name", &Qnn_TensorV2_t::name)
.def_readonly("type", &Qnn_TensorV2_t::type)
.def_readonly("dataFormat", &Qnn_TensorV2_t::dataFormat)
.def_readonly("dataType", &Qnn_TensorV2_t::dataType)
.def_readonly("quantizeParams", &Qnn_TensorV2_t::quantizeParams)
.def_readonly("rank", &Qnn_TensorV2_t::rank)
// change dimensions pointer to vector(begin to rank)
.def_property_readonly(
"dimensions",
[](const Qnn_TensorV1_t& t) {
[](const Qnn_TensorV2_t& t) {
return std::vector<uint32_t>(t.dimensions, t.dimensions + t.rank);
})
.def_readonly("memType", &Qnn_TensorV1_t::memType);
.def_property_readonly(
"isDynamicDimensions",
[](const Qnn_TensorV2_t& t) {
return t.dimensions == nullptr
? std::vector<uint32_t>()
: std::vector<uint32_t>(t.dimensions, t.dimensions + t.rank);
})
.def_readonly("memType", &Qnn_TensorV2_t::memType);

py::enum_<Qnn_TensorMemType_t>(m, "Qnn_TensorMemType_t")
.value(
Expand Down
20 changes: 10 additions & 10 deletions backends/qualcomm/aot/wrappers/OpWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,19 @@ Qnn_OpConfig_t OpWrapper::GetOpConfig() {

Qnn_OpConfig_t ret = QNN_OPCONFIG_INIT;
ret.version = QNN_OPCONFIG_VERSION_1;
Qnn_OpConfigV1_t& op_config = ret.v1;
auto op_config = QNN_OP_VER_PTR(ret);

op_config.name = name_.c_str();
op_config.packageName = package_name_.c_str();
op_config.typeName = op_type_.c_str();
op_config.numOfParams = static_cast<std::uint32_t>(param_types_.size());
op_config.params = param_types_.data();
op_config.numOfInputs =
op_config->name = name_.c_str();
op_config->packageName = package_name_.c_str();
op_config->typeName = op_type_.c_str();
op_config->numOfParams = static_cast<std::uint32_t>(param_types_.size());
op_config->params = param_types_.data();
op_config->numOfInputs =
static_cast<std::uint32_t>(input_tensor_structs_.size());
op_config.inputTensors = input_tensor_structs_.data();
op_config.numOfOutputs =
op_config->inputTensors = input_tensor_structs_.data();
op_config->numOfOutputs =
static_cast<std::uint32_t>(output_tensor_structs_.size());
op_config.outputTensors = output_tensor_structs_.data();
op_config->outputTensors = output_tensor_structs_.data();

return ret;
}
Expand Down
2 changes: 2 additions & 0 deletions backends/qualcomm/aot/wrappers/OpWrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,14 @@ class OpWrapper final {
std::unique_ptr<QuantizeParamsWrapper> quantize_param_wrapper =
std::make_unique<UndefinedQuantizeParamsWrapper>();
constexpr std::uint32_t kBytes = 0;
std::vector<uint8_t> dynamic_dims(rank, 0);
std::shared_ptr<TensorWrapper> tensor_wrapper = CreateTensorWrapper(
QNN_TENSOR_TYPE_STATIC,
data_type,
std::move(quantize_param_wrapper),
rank,
dims,
dynamic_dims.data(),
kBytes,
data,
copy_data);
Expand Down
Loading
Loading