Skip to content

[cortex-m] Add scalar c++ op for quantize_per_tensor #10266

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions backends/cortex_m/ops/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
# LICENSE file in the root directory of this source tree.

load("@fbcode_macros//build_defs:python_library.bzl", "python_library")
load("@fbcode_macros//build_defs:export_files.bzl", "export_file")
load("@fbsource//xplat/executorch/codegen:codegen.bzl", "executorch_generated_lib")
load("targets.bzl", "define_common_targets")

oncall("executorch")

Expand All @@ -17,5 +16,7 @@ python_library(
],
deps = [
"fbcode//caffe2:torch",
]
)
],
)

define_common_targets()
154 changes: 154 additions & 0 deletions backends/cortex_m/ops/op_quantize_per_tensor.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/runtime/kernel/kernel_includes.h>
#include <algorithm>
#include <cinttypes>
#include <cmath>

// Check for Helium/MVE support
#if defined(__ARM_FEATURE_MVE) && (__ARM_FEATURE_MVE & 1)
#include <arm_mve.h>
#define HAS_HELIUM_SIMD 1
#endif

namespace cortex_m {
namespace native {

using Tensor = executorch::aten::Tensor;
using ScalarType = executorch::aten::ScalarType;
using KernelRuntimeContext = torch::executor::KernelRuntimeContext;

namespace {

/**
* Asserts that the parameters are valid for float to int8 quantization.
*/
void check_quantize_args(
const Tensor& input,
int64_t quant_min,
int64_t quant_max,
ScalarType dtype,
Tensor& out) {
// Ensure input is float type
ET_CHECK_MSG(
input.scalar_type() == ScalarType::Float,
"input.scalar_type() %" PRId8 " is not float type",
static_cast<int8_t>(input.scalar_type()));

// Check output dtype is int8 (Char)
ET_CHECK_MSG(
out.scalar_type() == ScalarType::Char,
"out.scalar_type() %" PRId8 " is not int8 (Char)",
static_cast<int8_t>(out.scalar_type()));

// Check dtype is int8 (Char)
ET_CHECK_MSG(
dtype == ScalarType::Char,
"dtype %" PRId8 " is not int8 (Char)",
static_cast<int8_t>(dtype));

// Validate quant_min and quant_max for int8
int32_t quant_min_lower_bound = std::numeric_limits<int8_t>::min();
int32_t quant_max_upper_bound = std::numeric_limits<int8_t>::max();

ET_CHECK_MSG(
quant_min >= quant_min_lower_bound,
"quant_min out of bound for int8, expected quant_min_lower_bound: %" PRId32
" actual quant_min: %" PRId64,
quant_min_lower_bound,
quant_min);

ET_CHECK_MSG(
quant_max <= quant_max_upper_bound,
"quant_max out of bound for int8, expected quant_max_upper_bound: %" PRId32
" actual quant_max: %" PRId64,
quant_max_upper_bound,
quant_max);
}

/**
* Scalar implementation of quantization for a single value.
*/
template <typename T, typename K>
T quantize_val(
float inv_scale,
int32_t zero_point,
K value,
int64_t quant_min,
int64_t quant_max) {
int32_t qvalue =
zero_point + static_cast<int32_t>(std::nearbyint(inv_scale * value));
qvalue = std::max<int32_t>(qvalue, static_cast<int32_t>(quant_min));
qvalue = std::min<int32_t>(qvalue, static_cast<int32_t>(quant_max));
return static_cast<T>(qvalue);
}

} // namespace

Tensor& quantize_per_tensor_out(
KernelRuntimeContext& context,
const Tensor& input,
double scale,
int64_t zero_point,
int64_t quant_min,
int64_t quant_max,
ScalarType dtype,
Tensor& out) {
// Ignore context for now
(void)context;

// Resize output tensor to match input dimensions
torch::executor::Error err = resize_tensor(out, input.sizes());
ET_CHECK_MSG(
err == torch::executor::Error::Ok,
"Failed to resize out Tensor in quantize_per_tensor_out");

// Validate input parameters
check_quantize_args(input, quant_min, quant_max, dtype, out);

// Pre-compute inverse scale for better performance
float inv_scale = 1.0f / static_cast<float>(scale);
int32_t zp = static_cast<int32_t>(zero_point);
int32_t qmin = static_cast<int32_t>(quant_min);
int32_t qmax = static_cast<int32_t>(quant_max);

// Get pointers to input and output data
const float* input_data = input.const_data_ptr<float>();
int8_t* out_data = out.mutable_data_ptr<int8_t>();
const size_t numel = input.numel();

#if defined(HAS_HELIUM_SIMD)
// Helium MVE implementation for float32 to int8 quantization
#Error "Implement MVE version!"
#else
// Scalar implementation for float32 to int8 quantization
for (size_t i = 0; i < numel; i++) {
out_data[i] =
quantize_val<int8_t, float>(inv_scale, zp, input_data[i], qmin, qmax);
}
#endif

return out;
}

Tensor& quantize_per_tensor_out(
const Tensor& input,
double scale,
int64_t zero_point,
int64_t quant_min,
int64_t quant_max,
ScalarType dtype,
Tensor& out) {
KernelRuntimeContext context;
return quantize_per_tensor_out(
context, input, scale, zero_point, quant_min, quant_max, dtype, out);
}

} // namespace native
} // namespace cortex_m
11 changes: 11 additions & 0 deletions backends/cortex_m/ops/operators.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

- func: cortex_m::quantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
kernels:
- arg_meta: null
kernel_name: cortex_m::quantize_per_tensor_out
68 changes: 68 additions & 0 deletions backends/cortex_m/ops/targets.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

load("@fbsource//tools/build_defs:platform_defs.bzl", "CXX")
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
load("@fbsource//xplat/executorch/codegen:codegen.bzl", "et_operator_library", "executorch_generated_lib")
load("@fbcode_macros//build_defs:export_files.bzl", "export_file")

def define_operator_target(name: str):
runtime.cxx_library(
name = "op_{}".format(name),
srcs = [
"op_{}.cpp".format(name),
],
platforms = CXX,
deps = [
"//executorch/runtime/kernel:kernel_includes"
],
link_whole = True,
)

OPERATORS = [
"quantize_per_tensor",
]

def define_common_targets():
"""Defines targets that should be shared between fbcode and xplat.

The directory containing this targets.bzl file should also contain both
TARGETS and BUCK files that call this function.
"""
for op in OPERATORS:
define_operator_target(op)

all_op_targets = [":op_{}".format(op) for op in OPERATORS]

runtime.cxx_library(
name = "cortex_m_operators",
srcs = [],
visibility = [
"//executorch/...",
"@EXECUTORCH_CLIENTS",
],
exported_deps = all_op_targets,
)

export_file(name = "operators.yaml")

et_operator_library(
name = "ops_lib",
_is_external_target = True,
ops_schema_yaml_target = ":operators.yaml",
)

executorch_generated_lib(
name = "cortex_m_generated_lib",
deps = [
":ops_lib",
":cortex_m_operators",
],
functions_yaml_target = ":operators.yaml",
platforms = CXX,
visibility = ["PUBLIC"],
define_static_targets = True,
)
7 changes: 6 additions & 1 deletion backends/cortex_m/test/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
# LICENSE file in the root directory of this source tree.

load("@fbcode_macros//build_defs:python_unittest.bzl", "python_unittest")
load("targets.bzl", "define_common_targets")

oncall("executorch")

python_unittest(
name = "test_replace_quant_nodes",
Expand All @@ -15,4 +18,6 @@ python_unittest(
"//executorch/backends/cortex_m/passes:replace_quant_nodes_pass",
"//executorch/backends/cortex_m/ops:ops",
],
)
)

define_common_targets()
55 changes: 55 additions & 0 deletions backends/cortex_m/test/op_quantize_per_tensor_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/backends/cortex_m/ops/NativeFunctions.h> // Declares the operator
#include <executorch/runtime/core/exec_aten/exec_aten.h>
#include <executorch/runtime/core/exec_aten/testing_util/tensor_factory.h>
#include <executorch/runtime/core/exec_aten/testing_util/tensor_util.h>
#include <executorch/runtime/kernel/kernel_includes.h>
#include <gtest/gtest.h>

using executorch::aten::ScalarType;
using executorch::aten::Tensor;
using executorch::runtime::KernelRuntimeContext;
using torch::executor::testing::TensorFactory;

// Test op
using cortex_m::native::quantize_per_tensor_out;

void test_dtype() {
TensorFactory<ScalarType::Float> tf;

Tensor input = tf.full({3, 5}, 4);
double scale = 0.5;

int64_t zero_point = 108;
int64_t quant_min = 0;
int64_t quant_max = 127;

TensorFactory<ScalarType::Char> tfo;
Tensor out = tfo.zeros({3, 5});
// 4 / 0.5 + 108 = 116
Tensor expected = tfo.full({3, 5}, 116);

KernelRuntimeContext ctx;
quantize_per_tensor_out(
ctx,
input,
scale,
zero_point,
quant_min,
quant_max,
ScalarType::Char,
out);

EXPECT_TENSOR_EQ(out, expected);
}

TEST(OpQuantizeOutTest, AllDtypesSupported) {
test_dtype();
}
37 changes: 37 additions & 0 deletions backends/cortex_m/test/targets.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")

OPERATORS = [
"quantize_per_tensor",
]

def define_operator_test_target(op):
runtime.cxx_test(
name = "op_{}_test".format(op),
srcs = [
"op_{}_test.cpp".format(op),
],
deps = [
"//executorch/runtime/kernel:kernel_includes",
"//executorch/kernels/test:test_util",
"//executorch/backends/cortex_m/ops:op_{}".format(op),
"//executorch/backends/cortex_m/ops:cortex_m_generated_lib",
"//executorch/backends/cortex_m/ops:cortex_m_generated_lib_headers",
]
)

def define_common_targets():
"""Defines targets that should be shared between fbcode and xplat.

The directory containing this targets.bzl file should also contain both
TARGETS and BUCK files that call this function.
"""
for op in OPERATORS:
define_operator_test_target(op)


Loading