Skip to content

Cadence fusiong3 operators m2 #7490

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
url = https://github.com/pybind/pybind11.git
[submodule "backends/cadence/fusion_g3/third-party/nnlib/nnlib-FusionG3"]
path = backends/cadence/fusion_g3/third-party/nnlib/nnlib-FusionG3
url = https://github.com/foss-xtensa/nnlib-FusionG3/
url = https://github.com/foss-xtensa/nnlib-FusionG3.git
[submodule "third-party/ao"]
path = third-party/ao
url = https://github.com/pytorch/ao.git
26 changes: 20 additions & 6 deletions backends/cadence/aot/functions_fusion_g3.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@
- op: div.out
kernels:
- arg_meta: null
kernel_name: torch::executor::div_out
kernel_name: cadence::impl::G3::div_out

- op: div.out_mode
kernels:
- arg_meta: null
kernel_name: torch::executor::div_out_mode
kernel_name: cadence::impl::G3::div_out_mode

- op: embedding.out
kernels:
Expand All @@ -71,7 +71,6 @@
kernels:
- arg_meta: null
kernel_name: cadence::impl::G3::mul_out

- op: mul.Scalar_out
kernels:
- arg_meta: null
Expand All @@ -80,7 +79,7 @@
- op: permute_copy.out
kernels:
- arg_meta: null
kernel_name: torch::executor::permute_copy_out
kernel_name: cadence::impl::G3::permute_copy_out

- op: sigmoid.out
kernels:
Expand All @@ -90,7 +89,7 @@
- op: slice_copy.Tensor_out
kernels:
- arg_meta: null
kernel_name: torch::executor::slice_copy_Tensor_out
kernel_name: cadence::impl::G3::slice_copy_Tensor_out

- op: split_with_sizes_copy.out
kernels:
Expand All @@ -100,7 +99,12 @@
- op: sub.out
kernels:
- arg_meta: null
kernel_name: torch::executor::sub_out
kernel_name: cadence::impl::G3::sub_out

- op: sub.Scalar_out
kernels:
- arg_meta: null
kernel_name: cadence::impl::G3::sub_scalar_out

- op: view_copy.out
kernels:
Expand All @@ -117,6 +121,16 @@
- arg_meta: null
kernel_name: cadence::impl::G3::native_layer_norm_out

- op: mean.out
kernels:
- arg_meta: null
kernel_name: cadence::impl::G3::mean_dim_out

- op: exp.out
kernels:
- arg_meta: null
kernel_name: cadence::impl::G3::exp_out

# custom ops
- func: cadence::quantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
Expand Down
7 changes: 7 additions & 0 deletions backends/cadence/fusion_g3/operators/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ set(_aten_ops__srcs
"${CMAKE_CURRENT_SOURCE_DIR}/op_native_layer_norm.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/op_quantize.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/op_dequantize.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/op_sub.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/op_div.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/op_mean.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/op_slice_copy.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/op_permute_copy.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/op_exp.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_bmm.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_clone.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_div.cpp"
Expand All @@ -51,6 +57,7 @@ set(_aten_ops__srcs
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_where.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/dtype_util.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/normalization_ops_util.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/pattern/unary_ufunc_realhbbf16_to_floathbf16.cpp"
)
add_library(aten_ops_cadence ${_aten_ops__srcs})
target_link_libraries(aten_ops_cadence PUBLIC executorch)
Expand Down
6 changes: 4 additions & 2 deletions backends/cadence/fusion_g3/operators/op_add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ Tensor& add_out(
ScalarType common_type =
executorch::runtime::promoteTypes(a.scalar_type(), b.scalar_type());

#ifdef OP_ARG_CHECK
// Check Common Dtype
ET_KERNEL_CHECK(
ctx,
Expand All @@ -62,12 +63,12 @@ Tensor& add_out(
torch::executor::resize_to_broadcast_target_size(a, b, out) == Error::Ok,
InvalidArgument,
out);
#endif

// Compute Dtype
ScalarType compute_type =
torch::executor::native::utils::get_compute_type(common_type);

// @lint-ignore CLANGTIDY facebook-hte-CArray
static constexpr const char op_name[] = "add.out";

int kTensorDimensionLimit = 5;
Expand Down Expand Up @@ -253,6 +254,7 @@ Tensor& add_scalar_out(
torch::executor::native::utils::promote_type_with_scalar(
a.scalar_type(), b);

#ifdef OP_ARG_CHECK
// Check Common Dtype
ET_KERNEL_CHECK(
ctx,
Expand All @@ -276,7 +278,7 @@ Tensor& add_scalar_out(
executorch::runtime::resize_tensor(out, a.sizes()) == Error::Ok,
InvalidArgument,
out);

#endif
// Compute Dtype
ScalarType compute_type =
torch::executor::native::utils::get_compute_type(common_type);
Expand Down
108 changes: 36 additions & 72 deletions backends/cadence/fusion_g3/operators/op_cat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,18 @@
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/backends/cadence/fusion_g3/operators/operators.h>
#include <executorch/backends/cadence/fusion_g3/operators/xt_utils.h>

#include <cstring>

#include <xa_nnlib_kernels_api.h>

#include <executorch/backends/cadence/fusion_g3/operators/xt_macros.h>
#include <executorch/kernels/portable/cpu/util/copy_ops_util.h>
#include <executorch/runtime/kernel/kernel_includes.h>

using ::executorch::aten::ArrayRef;
using ::executorch::aten::ScalarType;
using ::executorch::aten::Tensor;
using ::executorch::runtime::Error;
Expand All @@ -23,7 +28,6 @@ using ::executorch::runtime::KernelRuntimeContext;
* updated to have support for below data types, these can be removed and
* operator need to be updated accordingly
*/
enum datatype { Ushort = 20, Uint = 23 };

namespace cadence {
namespace impl {
Expand All @@ -32,20 +36,22 @@ namespace native {

Tensor& cat_out(
KernelRuntimeContext& ctx,
exec_aten::ArrayRef<Tensor> tensors,
ArrayRef<Tensor> tensors,
int64_t dim,
Tensor& out) {
if (dim < 0) {
dim += out.dim();
}

int kTensorDimensionLimit = executorch::runtime::kTensorDimensionLimit;

#ifdef OP_ARG_CHECK
ET_KERNEL_CHECK(
ctx,
torch::executor::check_cat_args(tensors, dim, out),
InvalidArgument,
out);

int kTensorDimensionLimit = executorch::runtime::kTensorDimensionLimit;
Tensor::SizesType expected_out_size[kTensorDimensionLimit];
size_t expected_out_dim = 0;
torch::executor::get_cat_out_target_size(
Expand All @@ -57,14 +63,28 @@ Tensor& cat_out(
out, {expected_out_size, expected_out_dim}) == Error::Ok,
InvalidArgument,
out);
#endif
// Special handling when all inputs are 1D-empty tensors for aten
// consistency In that case, just return an 1D-empty tensor without checking
// dim
bool all_1d_empty = true;
for (size_t i = 0; i < tensors.size(); ++i) {
if (tensors[i].numel() != 0 || tensors[i].dim() != 1) {
all_1d_empty = false;
break;
}
}
if (all_1d_empty) {
return out;
}

const signed char* inp_tensors[tensors.size()];
const int* inp_tensors_shapes[tensors.size()];

int inp_shapes_size[tensors.size()];

int temp_sizes[tensors.size()][kTensorDimensionLimit];
exec_aten::ArrayRef<Tensor::SizesType> temp_size;
ArrayRef<Tensor::SizesType> temp_size;

for (int i = 0; i < tensors.size(); i++) {
inp_tensors[i] = tensors[i].const_data_ptr<signed char>();
Expand All @@ -79,88 +99,32 @@ Tensor& cat_out(

signed char* out_data = out.mutable_data_ptr<signed char>();

const exec_aten::ArrayRef<Tensor::SizesType> out_size = out.sizes();
const ArrayRef<Tensor::SizesType> out_size = out.sizes();
int out_shapes[kTensorDimensionLimit];
for (int i = 0; i < out_size.size(); i++) // output shapes
{
out_shapes[i] = out_size[i];
}

if (out.scalar_type() == ScalarType::Int) {
xa_nn_cat(
out_data,
out_shapes,
inp_tensors,
inp_tensors_shapes,
inp_shapes_size[0],
tensors.size(),
(int)dim,
sizeof(int));
} else if (out.scalar_type() == ScalarType::Short) {
xa_nn_cat(
out_data,
out_shapes,
inp_tensors,
inp_tensors_shapes,
inp_shapes_size[0],
tensors.size(),
(int)dim,
sizeof(short));
} else if (out.scalar_type() == ScalarType::Char) {
xa_nn_cat(
out_data,
out_shapes,
inp_tensors,
inp_tensors_shapes,
inp_shapes_size[0],
tensors.size(),
(int)dim,
sizeof(char));
} else if (out.scalar_type() == (ScalarType)Uint) {
xa_nn_cat(
out_data,
out_shapes,
inp_tensors,
inp_tensors_shapes,
inp_shapes_size[0],
tensors.size(),
(int)dim,
sizeof(int));
} else if (out.scalar_type() == (ScalarType)Ushort) {
xa_nn_cat(
if ((out.scalar_type() == ScalarType::Int) ||
(out.scalar_type() == ScalarType::Short) ||
(out.scalar_type() == ScalarType::Char) ||
(out.scalar_type() == ScalarType::UInt32) ||
(out.scalar_type() == ScalarType::UInt16) ||
(out.scalar_type() == ScalarType::Byte)) {
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_cat,
out_data,
out_shapes,
inp_tensors,
inp_tensors_shapes,
inp_shapes_size[0],
tensors.size(),
(int)dim,
sizeof(short));
} else if (out.scalar_type() == ScalarType::Byte) {
xa_nn_cat(
out_data,
out_shapes,
inp_tensors,
inp_tensors_shapes,
inp_shapes_size[0],
tensors.size(),
(int)dim,
sizeof(char));

get_element_size(out.scalar_type()));
} else {
// Special handling when all inputs are 1D-empty tensors for aten
// consistency In that case, just return an 1D-empty tensor without checking
// dim
bool all_1d_empty = true;
for (size_t i = 0; i < tensors.size(); ++i) {
if (tensors[i].numel() != 0 || tensors[i].dim() != 1) {
all_1d_empty = false;
break;
}
}
if (all_1d_empty) {
return out;
}
const size_t outer = executorch::runtime::getLeadingDims(out, dim);
const size_t dim_stride = executorch::runtime::getTrailingDims(out, dim);
const size_t ninputs = tensors.size();
Expand Down
Loading
Loading