Skip to content

Commit 6941d46

Browse files
authored
FusionG3 operators. (#7315)
* Allow backends/cadence to use TestUtil.h. (#7304) Summary: Create a separate buck target for `TestUtil.h` that can be used by backends. The current `test_util` target has dependencies that don't compile for xtensa toolchain. Reviewed By: zonglinpeng Differential Revision: D67128600 * Xtensa ISS PAL layer for logging/timing. (#7311) Summary: Overrides for `et_pal*` weak symbols for logging/timing with xtensa ISS. Reviewed By: zonglinpeng Differential Revision: D67128599 * Use macro `XT_KERNEL_CHECK` to handle errors returned by nnlib. (#7312) Summary: Use ET_KERNEL_CHECK to detect error codes returned by xa_nn* library calls. Reviewed By: zonglinpeng Differential Revision: D67128597 * Separate buck targets per operator. (#7314) Summary: Keep targets separate so we only compile the operators we need. Reviewed By: zonglinpeng Differential Revision: D67128598 * FusionG3 operators. (#7315) Summary: Cleanup header order and `using` declarations for operators to match style guide. Reviewed By: zonglinpeng Differential Revision: D67128499
1 parent 61b9e1b commit 6941d46

File tree

14 files changed

+381
-117
lines changed

14 files changed

+381
-117
lines changed

backends/cadence/fusion_g3/operators/op_add.cpp

Lines changed: 99 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,37 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88

9+
#include <executorch/backends/cadence/fusion_g3/operators/operators.h>
10+
11+
#include <xa_nnlib_kernels_api.h>
12+
913
#include <executorch/kernels/portable/cpu/scalar_utils.h>
1014
#include <executorch/kernels/portable/cpu/util/elementwise_util.h>
1115
#include <executorch/kernels/portable/cpu/util/kernel_ops_util.h>
1216
#include <executorch/runtime/kernel/kernel_includes.h>
1317
#include <executorch/runtime/platform/assert.h>
14-
#include <xa_nnlib_kernels_api.h>
1518

16-
using exec_aten::Scalar;
17-
using exec_aten::ScalarType;
18-
using exec_aten::Tensor;
19-
using executorch::runtime::canCast;
20-
using torch::executor::Error;
21-
using torch::executor::KernelRuntimeContext;
19+
using ::executorch::aten::Scalar;
20+
using ::executorch::aten::ScalarType;
21+
using ::executorch::aten::Tensor;
22+
using ::executorch::runtime::canCast;
23+
using ::executorch::runtime::Error;
24+
using ::executorch::runtime::KernelRuntimeContext;
2225

2326
namespace cadence {
2427
namespace impl {
2528
namespace G3 {
2629
namespace native {
2730

31+
#define XT_KERNEL_CHECK(ctx, out, kernel, ...) \
32+
const auto ret = kernel(__VA_ARGS__); \
33+
ET_KERNEL_CHECK_MSG( \
34+
ctx, \
35+
ret == 0, \
36+
InvalidArgument, \
37+
out, \
38+
"Failed to run kernel: " #kernel "(" #__VA_ARGS__ ")");
39+
2840
Tensor& add_out(
2941
KernelRuntimeContext& ctx,
3042
const Tensor& a,
@@ -121,13 +133,30 @@ Tensor& add_out(
121133
torch::executor::native::utils::extract_scalar(alpha, &alpha_val);
122134

123135
if ((a.numel() == 1) && (alpha_val == 1)) {
124-
xa_nn_elm_add_scalar_32x32_32(
125-
out_data, inp2_data, inp1_data[0], alpha_val, out.numel());
136+
XT_KERNEL_CHECK(
137+
ctx,
138+
out,
139+
xa_nn_elm_add_scalar_32x32_32,
140+
out_data,
141+
inp2_data,
142+
inp1_data[0],
143+
alpha_val,
144+
out.numel());
126145
} else if (b.numel() == 1) {
127-
xa_nn_elm_add_scalar_32x32_32(
128-
out_data, inp1_data, inp2_data[0], alpha_val, out.numel());
146+
XT_KERNEL_CHECK(
147+
ctx,
148+
out,
149+
xa_nn_elm_add_scalar_32x32_32,
150+
out_data,
151+
inp1_data,
152+
inp2_data[0],
153+
alpha_val,
154+
out.numel());
129155
} else if (broadcast) {
130-
xa_nn_elm_add_broadcast_5D_32x32_32(
156+
XT_KERNEL_CHECK(
157+
ctx,
158+
out,
159+
xa_nn_elm_add_broadcast_5D_32x32_32,
131160
out_data,
132161
out_shape,
133162
inp1_data,
@@ -137,8 +166,15 @@ Tensor& add_out(
137166
max_dim,
138167
alpha_val);
139168
} else {
140-
xa_nn_elm_add_32x32_32(
141-
out_data, inp1_data, inp2_data, alpha_val, out.numel());
169+
XT_KERNEL_CHECK(
170+
ctx,
171+
out,
172+
xa_nn_elm_add_32x32_32,
173+
out_data,
174+
inp1_data,
175+
inp2_data,
176+
alpha_val,
177+
out.numel());
142178
}
143179
} else if ((compute_type == ScalarType::Float) && (optimized)) {
144180
const float* const inp1_data = a.const_data_ptr<float>();
@@ -149,13 +185,30 @@ Tensor& add_out(
149185
torch::executor::native::utils::extract_scalar(alpha, &alpha_val);
150186

151187
if ((a.numel() == 1) && (alpha_val == 1.0)) {
152-
xa_nn_elm_add_scalar_f32xf32_f32(
153-
out_data, inp2_data, inp1_data[0], alpha_val, out.numel());
188+
XT_KERNEL_CHECK(
189+
ctx,
190+
out,
191+
xa_nn_elm_add_scalar_f32xf32_f32,
192+
out_data,
193+
inp2_data,
194+
inp1_data[0],
195+
alpha_val,
196+
out.numel());
154197
} else if (b.numel() == 1) {
155-
xa_nn_elm_add_scalar_f32xf32_f32(
156-
out_data, inp1_data, inp2_data[0], alpha_val, out.numel());
198+
XT_KERNEL_CHECK(
199+
ctx,
200+
out,
201+
xa_nn_elm_add_scalar_f32xf32_f32,
202+
out_data,
203+
inp1_data,
204+
inp2_data[0],
205+
alpha_val,
206+
out.numel());
157207
} else if (broadcast) {
158-
xa_nn_elm_add_broadcast_5D_f32xf32_f32(
208+
XT_KERNEL_CHECK(
209+
ctx,
210+
out,
211+
xa_nn_elm_add_broadcast_5D_f32xf32_f32,
159212
out_data,
160213
out_shape,
161214
inp1_data,
@@ -165,8 +218,15 @@ Tensor& add_out(
165218
max_dim,
166219
alpha_val);
167220
} else {
168-
xa_nn_elm_add_f32xf32_f32(
169-
out_data, inp1_data, inp2_data, alpha_val, out.numel());
221+
XT_KERNEL_CHECK(
222+
ctx,
223+
out,
224+
xa_nn_elm_add_f32xf32_f32,
225+
out_data,
226+
inp1_data,
227+
inp2_data,
228+
alpha_val,
229+
out.numel());
170230
}
171231
} else {
172232
ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() {
@@ -242,8 +302,15 @@ Tensor& add_scalar_out(
242302

243303
int* const out_data = out.mutable_data_ptr<int>();
244304

245-
xa_nn_elm_add_scalar_32x32_32(
246-
out_data, inp1_data, inp2_val, alpha_val, out.numel());
305+
XT_KERNEL_CHECK(
306+
ctx,
307+
out,
308+
xa_nn_elm_add_scalar_32x32_32,
309+
out_data,
310+
inp1_data,
311+
inp2_val,
312+
alpha_val,
313+
out.numel());
247314

248315
} else if (compute_type == ScalarType::Float) {
249316
const float* const inp1_data = a.const_data_ptr<float>();
@@ -255,8 +322,15 @@ Tensor& add_scalar_out(
255322

256323
float* const out_data = out.mutable_data_ptr<float>();
257324

258-
xa_nn_elm_add_scalar_f32xf32_f32(
259-
out_data, inp1_data, inp2_val, alpha_val, out.numel());
325+
XT_KERNEL_CHECK(
326+
ctx,
327+
out,
328+
xa_nn_elm_add_scalar_f32xf32_f32,
329+
out_data,
330+
inp1_data,
331+
inp2_val,
332+
alpha_val,
333+
out.numel());
260334

261335
} else {
262336
ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() {

backends/cadence/fusion_g3/operators/op_cat.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,17 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88

9+
#include <cstring>
10+
11+
#include <xa_nnlib_kernels_api.h>
12+
913
#include <executorch/kernels/portable/cpu/util/copy_ops_util.h>
1014
#include <executorch/runtime/kernel/kernel_includes.h>
11-
#include <xa_nnlib_kernels_api.h>
12-
#include <cstring>
1315

14-
using exec_aten::Scalar;
15-
using exec_aten::ScalarType;
16-
using exec_aten::Tensor;
17-
using torch::executor::Error;
18-
using torch::executor::KernelRuntimeContext;
16+
using ::executorch::aten::ScalarType;
17+
using ::executorch::aten::Tensor;
18+
using ::executorch::runtime::Error;
19+
using ::executorch::runtime::KernelRuntimeContext;
1920

2021
/* ScalarType in Executorch do not have support for below data types.
2122
* So, creating a placeholder for these data types. Once, ScalarTypes is
@@ -194,4 +195,4 @@ Tensor& cat_out(
194195
} // namespace native
195196
} // namespace G3
196197
} // namespace impl
197-
} // namespace cadence
198+
} // namespace cadence

backends/cadence/fusion_g3/operators/op_dequantize.cpp

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,20 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88

9-
#include <executorch/kernels/portable/cpu/util/reduce_util.h>
10-
#include <executorch/runtime/kernel/kernel_includes.h>
11-
#include <xa_nnlib_kernels_api.h>
129
#include <algorithm>
1310
#include <cinttypes>
1411
#include <cmath>
1512

16-
using exec_aten::Scalar;
17-
using exec_aten::ScalarType;
18-
using exec_aten::Tensor;
19-
using torch::executor::Error;
20-
using torch::executor::KernelRuntimeContext;
13+
#include <xa_nnlib_kernels_api.h>
14+
15+
#include <executorch/kernels/portable/cpu/util/reduce_util.h>
16+
#include <executorch/runtime/kernel/kernel_includes.h>
17+
18+
using ::executorch::aten::Scalar;
19+
using ::executorch::aten::ScalarType;
20+
using ::executorch::aten::Tensor;
21+
using ::executorch::runtime::Error;
22+
using ::executorch::runtime::KernelRuntimeContext;
2123

2224
template <typename T>
2325
using optional = exec_aten::optional<T>;
@@ -185,7 +187,7 @@ void dequantize_impl(
185187
if (axis == NULL) {
186188
// calculate the dequantized output, cast scale to float to match fbgemm
187189
// behavior
188-
#define ASYM_DEQUANTIZE_IMPL_TESNOR(IN_CTYPE, OUT_CTYPE, out_dtype) \
190+
#define ASYM_DEQUANTIZE_IMPL_TENSOR(IN_CTYPE, OUT_CTYPE, out_dtype) \
189191
case ScalarType::out_dtype: { \
190192
/* Hoist these function calls out of our inner loop because they might not \
191193
* get inlined without LTO, particularly in ATen mode. */ \
@@ -201,7 +203,7 @@ void dequantize_impl(
201203
#define ASYM_CALCULATE_INT_TYPE_TENSOR(IN_CTYPE, in_dtype) \
202204
case ScalarType::in_dtype: \
203205
switch (out.scalar_type()) { \
204-
ET_FORALL_FLOAT_TYPES_WITH(IN_CTYPE, ASYM_DEQUANTIZE_IMPL_TESNOR); \
206+
ET_FORALL_FLOAT_TYPES_WITH(IN_CTYPE, ASYM_DEQUANTIZE_IMPL_TENSOR); \
205207
default: \
206208
ET_CHECK_MSG( \
207209
false, \
@@ -219,7 +221,7 @@ void dequantize_impl(
219221
static_cast<int8_t>(input.scalar_type()));
220222
}
221223
#undef ASYM_CALCULATE_INT_TYPE_TENSOR
222-
#undef ASYM_DEQUANTIZE_IMPL_TESNOR
224+
#undef ASYM_DEQUANTIZE_IMPL_TENSOR
223225
} else {
224226
// a list contains all dimensions except axis
225227
int64_t dims[input.dim() - 1];

backends/cadence/fusion_g3/operators/op_mul.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,19 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88

9+
#include <xa_nnlib_kernels_api.h>
10+
911
#include <executorch/kernels/portable/cpu/scalar_utils.h>
1012
#include <executorch/kernels/portable/cpu/util/elementwise_util.h>
1113
#include <executorch/runtime/kernel/kernel_includes.h>
1214
#include <executorch/runtime/platform/assert.h>
13-
#include <xa_nnlib_kernels_api.h>
1415

15-
using exec_aten::Scalar;
16-
using exec_aten::ScalarType;
17-
using exec_aten::Tensor;
18-
using executorch::runtime::canCast;
19-
using torch::executor::Error;
20-
using torch::executor::KernelRuntimeContext;
16+
using ::executorch::aten::Scalar;
17+
using ::executorch::aten::ScalarType;
18+
using ::executorch::aten::Tensor;
19+
using ::executorch::runtime::canCast;
20+
using ::executorch::runtime::Error;
21+
using ::executorch::runtime::KernelRuntimeContext;
2122

2223
namespace cadence {
2324
namespace impl {
@@ -238,4 +239,4 @@ Tensor& mul_scalar_out(
238239
} // namespace native
239240
} // namespace G3
240241
} // namespace impl
241-
} // namespace cadence
242+
} // namespace cadence

backends/cadence/fusion_g3/operators/op_native_layer_norm.cpp

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,20 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88

9+
#include <cmath>
10+
#include <tuple>
11+
12+
#include <xa_nnlib_kernels_api.h>
13+
914
#include <executorch/kernels/portable/cpu/util/normalization_ops_util.h>
1015
#include <executorch/kernels/portable/cpu/vec_ops.h>
1116
#include <executorch/runtime/kernel/kernel_includes.h>
12-
#include <xa_nnlib_kernels_api.h>
13-
#include <cmath>
14-
#include <tuple>
1517

16-
using Tensor = exec_aten::Tensor;
17-
using ScalarType = exec_aten::ScalarType;
18-
using IntArrayRef = exec_aten::ArrayRef<int64_t>;
19-
using torch::executor::Error;
20-
using torch::executor::KernelRuntimeContext;
18+
using ::executorch::aten::IntArrayRef;
19+
using ::executorch::aten::ScalarType;
20+
using ::executorch::aten::Tensor;
21+
using ::executorch::runtime::Error;
22+
using ::executorch::runtime::KernelRuntimeContext;
2123

2224
namespace cadence {
2325
namespace impl {
@@ -255,4 +257,4 @@ std::tuple<Tensor&, Tensor&, Tensor&> native_layer_norm_out(
255257
} // namespace native
256258
} // namespace G3
257259
} // namespace impl
258-
} // namespace cadence
260+
} // namespace cadence

0 commit comments

Comments
 (0)