Skip to content

Separate buck targets per operator. #7314

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 95 additions & 24 deletions backends/cadence/fusion_g3/operators/op_add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,27 @@
#include <executorch/runtime/platform/assert.h>
#include <xa_nnlib_kernels_api.h>

using exec_aten::Scalar;
using exec_aten::ScalarType;
using exec_aten::Tensor;
using executorch::runtime::canCast;
using torch::executor::Error;
using torch::executor::KernelRuntimeContext;
using ::executorch::aten::Scalar;
using ::executorch::aten::ScalarType;
using ::executorch::aten::Tensor;
using ::executorch::runtime::canCast;
using ::executorch::runtime::Error;
using ::executorch::runtime::KernelRuntimeContext;

namespace cadence {
namespace impl {
namespace G3 {
namespace native {

#define XT_KERNEL_CHECK(ctx, out, kernel, ...) \
const auto ret = kernel(__VA_ARGS__); \
ET_KERNEL_CHECK_MSG( \
ctx, \
ret == 0, \
InvalidArgument, \
out, \
"Failed to run kernel: " #kernel "(" #__VA_ARGS__ ")");

Tensor& add_out(
KernelRuntimeContext& ctx,
const Tensor& a,
Expand Down Expand Up @@ -121,13 +130,30 @@ Tensor& add_out(
torch::executor::native::utils::extract_scalar(alpha, &alpha_val);

if ((a.numel() == 1) && (alpha_val == 1)) {
xa_nn_elm_add_scalar_32x32_32(
out_data, inp2_data, inp1_data[0], alpha_val, out.numel());
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_scalar_32x32_32,
out_data,
inp2_data,
inp1_data[0],
alpha_val,
out.numel());
} else if (b.numel() == 1) {
xa_nn_elm_add_scalar_32x32_32(
out_data, inp1_data, inp2_data[0], alpha_val, out.numel());
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_scalar_32x32_32,
out_data,
inp1_data,
inp2_data[0],
alpha_val,
out.numel());
} else if (broadcast) {
xa_nn_elm_add_broadcast_5D_32x32_32(
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_broadcast_5D_32x32_32,
out_data,
out_shape,
inp1_data,
Expand All @@ -137,8 +163,15 @@ Tensor& add_out(
max_dim,
alpha_val);
} else {
xa_nn_elm_add_32x32_32(
out_data, inp1_data, inp2_data, alpha_val, out.numel());
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_32x32_32,
out_data,
inp1_data,
inp2_data,
alpha_val,
out.numel());
}
} else if ((compute_type == ScalarType::Float) && (optimized)) {
const float* const inp1_data = a.const_data_ptr<float>();
Expand All @@ -149,13 +182,30 @@ Tensor& add_out(
torch::executor::native::utils::extract_scalar(alpha, &alpha_val);

if ((a.numel() == 1) && (alpha_val == 1.0)) {
xa_nn_elm_add_scalar_f32xf32_f32(
out_data, inp2_data, inp1_data[0], alpha_val, out.numel());
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_scalar_f32xf32_f32,
out_data,
inp2_data,
inp1_data[0],
alpha_val,
out.numel());
} else if (b.numel() == 1) {
xa_nn_elm_add_scalar_f32xf32_f32(
out_data, inp1_data, inp2_data[0], alpha_val, out.numel());
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_scalar_f32xf32_f32,
out_data,
inp1_data,
inp2_data[0],
alpha_val,
out.numel());
} else if (broadcast) {
xa_nn_elm_add_broadcast_5D_f32xf32_f32(
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_broadcast_5D_f32xf32_f32,
out_data,
out_shape,
inp1_data,
Expand All @@ -165,8 +215,15 @@ Tensor& add_out(
max_dim,
alpha_val);
} else {
xa_nn_elm_add_f32xf32_f32(
out_data, inp1_data, inp2_data, alpha_val, out.numel());
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_f32xf32_f32,
out_data,
inp1_data,
inp2_data,
alpha_val,
out.numel());
}
} else {
ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() {
Expand Down Expand Up @@ -242,8 +299,15 @@ Tensor& add_scalar_out(

int* const out_data = out.mutable_data_ptr<int>();

xa_nn_elm_add_scalar_32x32_32(
out_data, inp1_data, inp2_val, alpha_val, out.numel());
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_scalar_32x32_32,
out_data,
inp1_data,
inp2_val,
alpha_val,
out.numel());

} else if (compute_type == ScalarType::Float) {
const float* const inp1_data = a.const_data_ptr<float>();
Expand All @@ -255,8 +319,15 @@ Tensor& add_scalar_out(

float* const out_data = out.mutable_data_ptr<float>();

xa_nn_elm_add_scalar_f32xf32_f32(
out_data, inp1_data, inp2_val, alpha_val, out.numel());
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_scalar_f32xf32_f32,
out_data,
inp1_data,
inp2_val,
alpha_val,
out.numel());

} else {
ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() {
Expand Down
65 changes: 46 additions & 19 deletions backends/cadence/fusion_g3/operators/targets.bzl
Original file line number Diff line number Diff line change
@@ -1,6 +1,45 @@
load("@fbsource//tools/build_defs:platform_defs.bzl", "CXX")
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")

def define_operator(name: str, deps: list[str] | None = None) -> None:
op_name = "op_{}".format(name)

# Deps used by all operators.
common_deps = [
"//executorch/kernels/portable/cpu/util:all_deps",
"//executorch/kernels/portable/cpu/pattern:all_deps",
"//executorch/runtime/kernel:kernel_includes",
"//executorch/kernels/portable/cpu:scalar_utils",
"fbsource//third-party/nnlib-FusionG3/xa_nnlib:libxa_nnlib_common",
"fbsource//third-party/nnlib-FusionG3/xa_nnlib:libxa_nnlib",
]
if deps == None:
deps = []

runtime.cxx_library(
name = op_name,
srcs = [op_name + ".cpp"],
platforms = CXX,
visibility = [
"//executorch/backends/cadence/...",
"@EXECUTORCH_CLIENTS",
],
deps = deps + common_deps,
exported_deps = [
":operators_header",
],
)

OPERATORS = [
"add",
"cat",
"dequantize",
"mul",
"native_layer_norm",
"quantize",
"softmax",
]

def define_common_targets():
"""Defines targets that should be shared between fbcode and xplat.

Expand All @@ -11,28 +50,16 @@ def define_common_targets():
# Define build targets for all operators registered in the tables above.

runtime.cxx_library(
name = "cadence_g3_ops",
srcs = glob([
"*.cpp",
]),
exported_headers = glob([
"*.h",
]),
platforms = CXX,
deps = [
"//executorch/kernels/portable/cpu/util:all_deps",
"//executorch/kernels/portable/cpu/pattern:all_deps",
"//executorch/runtime/kernel:kernel_includes",
"//executorch/kernels/portable/cpu:scalar_utils",
"fbsource//third-party/nnlib-FusionG3/xa_nnlib:libxa_nnlib_common",
"fbsource//third-party/nnlib-FusionG3/xa_nnlib:libxa_nnlib",
],
name = "operators_header",
exported_headers = ["operators.h"],
visibility = [
"//executorch/backends/cadence/...",
"@EXECUTORCH_CLIENTS",
],
exported_deps = [
"fbsource//third-party/nnlib-FusionG3/xa_nnlib:libxa_nnlib_common",
"fbsource//third-party/nnlib-FusionG3/xa_nnlib:libxa_nnlib",
"//executorch/runtime/core/exec_aten:lib",
"//executorch/runtime/kernel:kernel_runtime_context",
],
)

for op in OPERATORS:
define_operator(op)
33 changes: 25 additions & 8 deletions backends/cadence/fusion_g3/operators/tests/test_op_add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#include <stdio.h>

#include <executorch/backends/cadence/fusion_g3/operators/operators.h>
#include <executorch/kernels/test/TestUtil.h>
#include <executorch/runtime/core/error.h>
#include <executorch/runtime/core/exec_aten/exec_aten.h>
#include <executorch/runtime/core/exec_aten/testing_util/tensor_factory.h>
#include <executorch/runtime/core/exec_aten/testing_util/tensor_util.h>
Expand All @@ -24,24 +26,19 @@ namespace {
using ::executorch::aten::Scalar;
using ::executorch::aten::ScalarType;
using ::executorch::aten::Tensor;
using ::executorch::aten::TensorImpl;
using ::executorch::runtime::Error;
using ::executorch::runtime::KernelRuntimeContext;
using ::executorch::runtime::runtime_init;
using ::executorch::runtime::testing::TensorFactory;
using ::testing::Test;

class FusionG3OperatorTest : public Test {
class FusionG3OperatorTest : public OperatorTest {
public:
void SetUp() override {
runtime_init();
}

protected:
Tensor&
add_out(const Tensor& a, const Tensor& b, const Scalar& alpha, Tensor& out) {
return cadence::impl::G3::native::add_out(context_, a, b, alpha, out);
}

KernelRuntimeContext context_;
};

TEST_F(FusionG3OperatorTest, TwoDimFloatTensorAddTest) {
Expand Down Expand Up @@ -77,6 +74,26 @@ TEST_F(FusionG3OperatorTest, AddWithBroadcastTest) {
EXPECT_TENSOR_EQ(out, tf.full(size_a, 2));
}

TEST_F(FusionG3OperatorTest, KernelCheckTest) {
TensorFactory<ScalarType::Float> tf;
// Broadcast add.
const std::vector<TensorImpl::SizesType> sizeOfA{1, 3, 2, 4}, sizeOfB{2, 4};
const Tensor b = tf.ones(sizeOfB);
Tensor out = tf.zeros(sizeOfA);
// Create a null tensor to force kernel check failure.
TensorImpl nullTensorImpl(
b.scalar_type(),
b.dim(),
const_cast<TensorImpl::SizesType*>(b.sizes().data()),
// Use nullptr to force kernel check failure.
/*data=*/nullptr,
const_cast<TensorImpl::DimOrderType*>(b.dim_order().data()));
Tensor nullTensor(&nullTensorImpl);

ET_EXPECT_KERNEL_FAILURE(
context_, add_out(tf.ones(sizeOfA), nullTensor, 1, out));
}

} // namespace
} // namespace native
} // namespace G3
Expand Down
3 changes: 3 additions & 0 deletions backends/cadence/runtime/TARGETS
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
load(":targets.bzl", "define_common_targets")
load("@fbcode_macros//build_defs:python_library.bzl", "python_library")

oncall("odai_jarvis")
Expand All @@ -22,3 +23,5 @@ python_library(
"//executorch/exir:lib",
],
)

define_common_targets()
Loading