pytorch
diff --git a/‎backends/cadence/fusion_g3/operators/op_add.cpp
Lines changed: 99 additions & 25 deletions b/‎backends/cadence/fusion_g3/operators/op_add.cpp
Lines changed: 99 additions & 25 deletions
diff --git a/‎backends/cadence/fusion_g3/operators/op_cat.cpp
Lines changed: 9 additions & 8 deletions b/‎backends/cadence/fusion_g3/operators/op_cat.cpp
Lines changed: 9 additions & 8 deletions
diff --git a/‎backends/cadence/fusion_g3/operators/op_dequantize.cpp
Lines changed: 13 additions & 11 deletions b/‎backends/cadence/fusion_g3/operators/op_dequantize.cpp
Lines changed: 13 additions & 11 deletions
diff --git a/‎backends/cadence/fusion_g3/operators/op_mul.cpp
Lines changed: 9 additions & 8 deletions b/‎backends/cadence/fusion_g3/operators/op_mul.cpp
Lines changed: 9 additions & 8 deletions
diff --git a/‎backends/cadence/fusion_g3/operators/op_native_layer_norm.cpp
Lines changed: 11 additions & 9 deletions b/‎backends/cadence/fusion_g3/operators/op_native_layer_norm.cpp
Lines changed: 11 additions & 9 deletions
@@ -6,25 +6,37 @@
  * LICENSE file in the root directory of this source tree.
  */
 
+#include <executorch/backends/cadence/fusion_g3/operators/operators.h>
+
+#include <xa_nnlib_kernels_api.h>
+
 #include <executorch/kernels/portable/cpu/scalar_utils.h>
 #include <executorch/kernels/portable/cpu/util/elementwise_util.h>
 #include <executorch/kernels/portable/cpu/util/kernel_ops_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 #include <executorch/runtime/platform/assert.h>
-#include <xa_nnlib_kernels_api.h>
 
-using exec_aten::Scalar;
-using exec_aten::ScalarType;
-using exec_aten::Tensor;
-using executorch::runtime::canCast;
-using torch::executor::Error;
-using torch::executor::KernelRuntimeContext;
+using ::executorch::aten::Scalar;
+using ::executorch::aten::ScalarType;
+using ::executorch::aten::Tensor;
+using ::executorch::runtime::canCast;
+using ::executorch::runtime::Error;
+using ::executorch::runtime::KernelRuntimeContext;
 
 namespace cadence {
 namespace impl {
 namespace G3 {
 namespace native {
 
+#define XT_KERNEL_CHECK(ctx, out, kernel, ...) \
+  const auto ret = kernel(__VA_ARGS__);        \
+  ET_KERNEL_CHECK_MSG(                         \
+      ctx,                                     \
+      ret == 0,                                \
+      InvalidArgument,                         \
+      out,                                     \
+      "Failed to run kernel: " #kernel "(" #__VA_ARGS__ ")");
+
 Tensor& add_out(
     KernelRuntimeContext& ctx,
     const Tensor& a,
@@ -121,13 +133,30 @@ Tensor& add_out(
     torch::executor::native::utils::extract_scalar(alpha, &alpha_val);
 
     if ((a.numel() == 1) && (alpha_val == 1)) {
-      xa_nn_elm_add_scalar_32x32_32(
-          out_data, inp2_data, inp1_data[0], alpha_val, out.numel());
+      XT_KERNEL_CHECK(
+          ctx,
+          out,
+          xa_nn_elm_add_scalar_32x32_32,
+          out_data,
+          inp2_data,
+          inp1_data[0],
+          alpha_val,
+          out.numel());
     } else if (b.numel() == 1) {
-      xa_nn_elm_add_scalar_32x32_32(
-          out_data, inp1_data, inp2_data[0], alpha_val, out.numel());
+      XT_KERNEL_CHECK(
+          ctx,
+          out,
+          xa_nn_elm_add_scalar_32x32_32,
+          out_data,
+          inp1_data,
+          inp2_data[0],
+          alpha_val,
+          out.numel());
     } else if (broadcast) {
-      xa_nn_elm_add_broadcast_5D_32x32_32(
+      XT_KERNEL_CHECK(
+          ctx,
+          out,
+          xa_nn_elm_add_broadcast_5D_32x32_32,
           out_data,
           out_shape,
           inp1_data,
@@ -137,8 +166,15 @@ Tensor& add_out(
           max_dim,
           alpha_val);
     } else {
-      xa_nn_elm_add_32x32_32(
-          out_data, inp1_data, inp2_data, alpha_val, out.numel());
+      XT_KERNEL_CHECK(
+          ctx,
+          out,
+          xa_nn_elm_add_32x32_32,
+          out_data,
+          inp1_data,
+          inp2_data,
+          alpha_val,
+          out.numel());
     }
   } else if ((compute_type == ScalarType::Float) && (optimized)) {
     const float* const inp1_data = a.const_data_ptr<float>();
@@ -149,13 +185,30 @@ Tensor& add_out(
     torch::executor::native::utils::extract_scalar(alpha, &alpha_val);
 
     if ((a.numel() == 1) && (alpha_val == 1.0)) {
-      xa_nn_elm_add_scalar_f32xf32_f32(
-          out_data, inp2_data, inp1_data[0], alpha_val, out.numel());
+      XT_KERNEL_CHECK(
+          ctx,
+          out,
+          xa_nn_elm_add_scalar_f32xf32_f32,
+          out_data,
+          inp2_data,
+          inp1_data[0],
+          alpha_val,
+          out.numel());
     } else if (b.numel() == 1) {
-      xa_nn_elm_add_scalar_f32xf32_f32(
-          out_data, inp1_data, inp2_data[0], alpha_val, out.numel());
+      XT_KERNEL_CHECK(
+          ctx,
+          out,
+          xa_nn_elm_add_scalar_f32xf32_f32,
+          out_data,
+          inp1_data,
+          inp2_data[0],
+          alpha_val,
+          out.numel());
     } else if (broadcast) {
-      xa_nn_elm_add_broadcast_5D_f32xf32_f32(
+      XT_KERNEL_CHECK(
+          ctx,
+          out,
+          xa_nn_elm_add_broadcast_5D_f32xf32_f32,
           out_data,
           out_shape,
           inp1_data,
@@ -165,8 +218,15 @@ Tensor& add_out(
           max_dim,
           alpha_val);
     } else {
-      xa_nn_elm_add_f32xf32_f32(
-          out_data, inp1_data, inp2_data, alpha_val, out.numel());
+      XT_KERNEL_CHECK(
+          ctx,
+          out,
+          xa_nn_elm_add_f32xf32_f32,
+          out_data,
+          inp1_data,
+          inp2_data,
+          alpha_val,
+          out.numel());
     }
   } else {
     ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() {
@@ -242,8 +302,15 @@ Tensor& add_scalar_out(
 
     int* const out_data = out.mutable_data_ptr<int>();
 
-    xa_nn_elm_add_scalar_32x32_32(
-        out_data, inp1_data, inp2_val, alpha_val, out.numel());
+    XT_KERNEL_CHECK(
+        ctx,
+        out,
+        xa_nn_elm_add_scalar_32x32_32,
+        out_data,
+        inp1_data,
+        inp2_val,
+        alpha_val,
+        out.numel());
 
   } else if (compute_type == ScalarType::Float) {
     const float* const inp1_data = a.const_data_ptr<float>();
@@ -255,8 +322,15 @@ Tensor& add_scalar_out(
 
     float* const out_data = out.mutable_data_ptr<float>();
 
-    xa_nn_elm_add_scalar_f32xf32_f32(
-        out_data, inp1_data, inp2_val, alpha_val, out.numel());
+    XT_KERNEL_CHECK(
+        ctx,
+        out,
+        xa_nn_elm_add_scalar_f32xf32_f32,
+        out_data,
+        inp1_data,
+        inp2_val,
+        alpha_val,
+        out.numel());
 
   } else {
     ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() {
 
@@ -6,16 +6,17 @@
  * LICENSE file in the root directory of this source tree.
  */
 
+#include <cstring>
+
+#include <xa_nnlib_kernels_api.h>
+
 #include <executorch/kernels/portable/cpu/util/copy_ops_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
-#include <xa_nnlib_kernels_api.h>
-#include <cstring>
 
-using exec_aten::Scalar;
-using exec_aten::ScalarType;
-using exec_aten::Tensor;
-using torch::executor::Error;
-using torch::executor::KernelRuntimeContext;
+using ::executorch::aten::ScalarType;
+using ::executorch::aten::Tensor;
+using ::executorch::runtime::Error;
+using ::executorch::runtime::KernelRuntimeContext;
 
 /* ScalarType in Executorch do not have support for below data types.
  * So, creating a placeholder for these data types. Once, ScalarTypes is
@@ -194,4 +195,4 @@ Tensor& cat_out(
 } // namespace native
 } // namespace G3
 } // namespace impl
-} // namespace cadence
+} // namespace cadence
@@ -6,18 +6,20 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <executorch/kernels/portable/cpu/util/reduce_util.h>
-#include <executorch/runtime/kernel/kernel_includes.h>
-#include <xa_nnlib_kernels_api.h>
 #include <algorithm>
 #include <cinttypes>
 #include <cmath>
 
-using exec_aten::Scalar;
-using exec_aten::ScalarType;
-using exec_aten::Tensor;
-using torch::executor::Error;
-using torch::executor::KernelRuntimeContext;
+#include <xa_nnlib_kernels_api.h>
+
+#include <executorch/kernels/portable/cpu/util/reduce_util.h>
+#include <executorch/runtime/kernel/kernel_includes.h>
+
+using ::executorch::aten::Scalar;
+using ::executorch::aten::ScalarType;
+using ::executorch::aten::Tensor;
+using ::executorch::runtime::Error;
+using ::executorch::runtime::KernelRuntimeContext;
 
 template <typename T>
 using optional = exec_aten::optional<T>;
@@ -185,7 +187,7 @@ void dequantize_impl(
       if (axis == NULL) {
 // calculate the dequantized output, cast scale to float to match fbgemm
 // behavior
-#define ASYM_DEQUANTIZE_IMPL_TESNOR(IN_CTYPE, OUT_CTYPE, out_dtype)            \
+#define ASYM_DEQUANTIZE_IMPL_TENSOR(IN_CTYPE, OUT_CTYPE, out_dtype)            \
   case ScalarType::out_dtype: {                                                \
     /* Hoist these function calls out of our inner loop because they might not \
      * get inlined without LTO, particularly in ATen mode. */                  \
@@ -201,7 +203,7 @@ void dequantize_impl(
 #define ASYM_CALCULATE_INT_TYPE_TENSOR(IN_CTYPE, in_dtype)               \
   case ScalarType::in_dtype:                                             \
     switch (out.scalar_type()) {                                         \
-      ET_FORALL_FLOAT_TYPES_WITH(IN_CTYPE, ASYM_DEQUANTIZE_IMPL_TESNOR); \
+      ET_FORALL_FLOAT_TYPES_WITH(IN_CTYPE, ASYM_DEQUANTIZE_IMPL_TENSOR); \
       default:                                                           \
         ET_CHECK_MSG(                                                    \
             false,                                                       \
@@ -219,7 +221,7 @@ void dequantize_impl(
                 static_cast<int8_t>(input.scalar_type()));
         }
 #undef ASYM_CALCULATE_INT_TYPE_TENSOR
-#undef ASYM_DEQUANTIZE_IMPL_TESNOR
+#undef ASYM_DEQUANTIZE_IMPL_TENSOR
       } else {
         // a list contains all dimensions except axis
         int64_t dims[input.dim() - 1];
 
@@ -6,18 +6,19 @@
  * LICENSE file in the root directory of this source tree.
  */
 
+#include <xa_nnlib_kernels_api.h>
+
 #include <executorch/kernels/portable/cpu/scalar_utils.h>
 #include <executorch/kernels/portable/cpu/util/elementwise_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 #include <executorch/runtime/platform/assert.h>
-#include <xa_nnlib_kernels_api.h>
 
-using exec_aten::Scalar;
-using exec_aten::ScalarType;
-using exec_aten::Tensor;
-using executorch::runtime::canCast;
-using torch::executor::Error;
-using torch::executor::KernelRuntimeContext;
+using ::executorch::aten::Scalar;
+using ::executorch::aten::ScalarType;
+using ::executorch::aten::Tensor;
+using ::executorch::runtime::canCast;
+using ::executorch::runtime::Error;
+using ::executorch::runtime::KernelRuntimeContext;
 
 namespace cadence {
 namespace impl {
@@ -238,4 +239,4 @@ Tensor& mul_scalar_out(
 } // namespace native
 } // namespace G3
 } // namespace impl
-} // namespace cadence
+} // namespace cadence
@@ -6,18 +6,20 @@
  * LICENSE file in the root directory of this source tree.
  */
 
+#include <cmath>
+#include <tuple>
+
+#include <xa_nnlib_kernels_api.h>
+
 #include <executorch/kernels/portable/cpu/util/normalization_ops_util.h>
 #include <executorch/kernels/portable/cpu/vec_ops.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
-#include <xa_nnlib_kernels_api.h>
-#include <cmath>
-#include <tuple>
 
-using Tensor = exec_aten::Tensor;
-using ScalarType = exec_aten::ScalarType;
-using IntArrayRef = exec_aten::ArrayRef<int64_t>;
-using torch::executor::Error;
-using torch::executor::KernelRuntimeContext;
+using ::executorch::aten::IntArrayRef;
+using ::executorch::aten::ScalarType;
+using ::executorch::aten::Tensor;
+using ::executorch::runtime::Error;
+using ::executorch::runtime::KernelRuntimeContext;
 
 namespace cadence {
 namespace impl {
@@ -255,4 +257,4 @@ std::tuple<Tensor&, Tensor&, Tensor&> native_layer_norm_out(
 } // namespace native
 } // namespace G3
 } // namespace impl
-} // namespace cadence
+} // namespace cadence