Skip to content

Commit 6da9617

Browse files
zonglinpengfacebook-github-bot
authored andcommitted
port add sub mul div tanh sigmoid from oss, create new 3p buck targets, add kernel modification (#6601)
Summary: Done the three things as titled - create buck targets for add mul sub div sigmoid and tanh - create new thirdparty buck targets for internal use: the OSS version is unique and leading to the GH version. by buckify the “staging” targets it’s much faster for us to get to the latest kernels. - modified cadence kernels to use the XT_ APIs Reviewed By: hsharma35 Differential Revision: D65300260
1 parent f7e26d7 commit 6da9617

File tree

7 files changed

+176
-37
lines changed

7 files changed

+176
-37
lines changed

backends/cadence/aot/compiler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -207,10 +207,10 @@ def export_to_edge(
207207
def export_to_cadence_edge_executorch(
208208
model: torch.nn.Module,
209209
inputs: tuple[object, ...],
210-
dump_graphs: bool = False,
211210
output_dir: Optional[str] = None,
211+
dump_graphs: bool = False,
212212
) -> ExecutorchProgramManager:
213-
edge_prog_manager = export_to_edge(model, inputs)
213+
edge_prog_manager = export_to_edge(model, inputs, dump_graphs)
214214

215215
# Run a couple required passes for quant/dequant ops
216216
cadence_prog_manager = edge_prog_manager.transform(

backends/cadence/hifi/operators/targets.bzl

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,122 @@ def define_common_targets():
3131
"@EXECUTORCH_CLIENTS",
3232
],
3333
)
34+
35+
runtime.cxx_library(
36+
name = "op_add",
37+
srcs = glob([
38+
"op_add.cpp",
39+
]),
40+
platforms = CXX,
41+
deps = [
42+
"//executorch/kernels/portable/cpu/util:all_deps",
43+
"//executorch/kernels/portable/cpu/pattern:all_deps",
44+
"//executorch/runtime/kernel:kernel_includes",
45+
"//executorch/kernels/portable/cpu:scalar_utils",
46+
"//executorch/backends/cadence/hifi/kernels:kernels",
47+
"//executorch/backends/cadence/hifi/third-party/nnlib:nnlib-extensions"
48+
],
49+
visibility = [
50+
"//executorch/backends/cadence/...",
51+
"@EXECUTORCH_CLIENTS",
52+
],
53+
)
54+
55+
56+
runtime.cxx_library(
57+
name = "op_mul",
58+
srcs = glob([
59+
"op_mul.cpp",
60+
]),
61+
platforms = CXX,
62+
deps = [
63+
"//executorch/kernels/portable/cpu/util:all_deps",
64+
"//executorch/kernels/portable/cpu/pattern:all_deps",
65+
"//executorch/runtime/kernel:kernel_includes",
66+
"//executorch/kernels/portable/cpu:scalar_utils",
67+
"//executorch/backends/cadence/hifi/kernels:kernels",
68+
"//executorch/backends/cadence/hifi/third-party/nnlib:nnlib-extensions"
69+
],
70+
visibility = [
71+
"//executorch/backends/cadence/...",
72+
"@EXECUTORCH_CLIENTS",
73+
],
74+
)
75+
76+
runtime.cxx_library(
77+
name = "op_sub",
78+
srcs = glob([
79+
"op_sub.cpp",
80+
]),
81+
platforms = CXX,
82+
deps = [
83+
"//executorch/kernels/portable/cpu/util:all_deps",
84+
"//executorch/kernels/portable/cpu/pattern:all_deps",
85+
"//executorch/runtime/kernel:kernel_includes",
86+
"//executorch/kernels/portable/cpu:scalar_utils",
87+
"//executorch/backends/cadence/hifi/kernels:kernels",
88+
"//executorch/backends/cadence/hifi/third-party/nnlib:nnlib-extensions"
89+
],
90+
visibility = [
91+
"//executorch/backends/cadence/...",
92+
"@EXECUTORCH_CLIENTS",
93+
],
94+
)
95+
96+
runtime.cxx_library(
97+
name = "op_div",
98+
srcs = glob([
99+
"op_div.cpp",
100+
]),
101+
platforms = CXX,
102+
deps = [
103+
"//executorch/kernels/portable/cpu/util:all_deps",
104+
"//executorch/kernels/portable/cpu/pattern:all_deps",
105+
"//executorch/runtime/kernel:kernel_includes",
106+
"//executorch/kernels/portable/cpu:scalar_utils",
107+
"//executorch/backends/cadence/hifi/kernels:kernels",
108+
"//executorch/backends/cadence/hifi/third-party/nnlib:nnlib-extensions"
109+
],
110+
visibility = [
111+
"//executorch/backends/cadence/...",
112+
"@EXECUTORCH_CLIENTS",
113+
],
114+
)
115+
116+
runtime.cxx_library(
117+
name = "op_sigmoid",
118+
srcs = glob([
119+
"op_sigmoid.cpp",
120+
]),
121+
platforms = CXX,
122+
deps = [
123+
"//executorch/kernels/portable/cpu/util:all_deps",
124+
"//executorch/kernels/portable/cpu/pattern:all_deps",
125+
"//executorch/runtime/kernel:kernel_includes",
126+
"//executorch/backends/cadence/hifi/kernels:kernels",
127+
"//executorch/backends/cadence/hifi/third-party/nnlib:nnlib-extensions"
128+
],
129+
visibility = [
130+
"//executorch/backends/cadence/...",
131+
"@EXECUTORCH_CLIENTS",
132+
],
133+
)
134+
135+
runtime.cxx_library(
136+
name = "op_tanh",
137+
srcs = glob([
138+
"op_tanh.cpp",
139+
]),
140+
platforms = CXX,
141+
deps = [
142+
"//executorch/kernels/portable/cpu/util:all_deps",
143+
"//executorch/kernels/portable/cpu/pattern:all_deps",
144+
"//executorch/runtime/kernel:kernel_includes",
145+
"//executorch/backends/cadence/hifi/kernels:kernels",
146+
"//executorch/backends/cadence/hifi/third-party/nnlib:nnlib-extensions"
147+
],
148+
visibility = [
149+
"//executorch/backends/cadence/...",
150+
"@EXECUTORCH_CLIENTS",
151+
],
152+
)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
load("targets.bzl", "define_common_targets")
2+
3+
oncall("odai_jarvis")
4+
5+
define_common_targets()
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2+
3+
load("@fbsource//tools/build_defs:platform_defs.bzl", "CXX")
4+
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
5+
6+
def define_common_targets():
7+
runtime.cxx_library(
8+
name = "nnlib-extensions",
9+
srcs = native.glob(["*.c", "*.cpp"]),
10+
exported_headers = glob(["*.h"]),
11+
visibility = [
12+
"//executorch/backends/cadence/...",
13+
"@EXECUTORCH_CLIENTS",
14+
],
15+
deps = [
16+
"fbsource//third-party/nnlib-hifi4/xa_nnlib:libxa_nnlib",
17+
],
18+
)

backends/cadence/hifi/third-party/nnlib/xa_nn_elm_add_f32_broadcast.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
#include "xa_nnlib_err_chk.h"
2626
#include "xa_nnlib_kernels_api.h"
2727

28-
2928
#if HAVE_VFPU
3029
static void internal_elm_add_broadcast_2D_f32xf32_f32(FLOAT32 * __restrict__ p_out,
3130
const FLOAT32 * __restrict__ p_inp1,
@@ -425,4 +424,3 @@ WORD32 xa_nn_elm_add_broadcast_4D_f32xf32_f32(FLOAT32 * __restrict__ p_out,
425424
return 0;
426425

427426
}
428-

backends/cadence/hifi/third-party/nnlib/xa_nn_elm_div_mode_f32_broadcast.c

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ WORD32 xa_nn_elm_div_mode_f32xf32_f32(FLOAT32 * __restrict__ p_out,
5454
XT_LASX2IP(x1, inp1_a, inp1);
5555
XT_LASX2IP(x2, inp2_a, inp2);
5656
y = XT_DIV_SX2(x1, x2);
57-
y = FITRUNC_SX2(y);
57+
y = XT_FITRUNC_SX2(y);
5858
XT_SASX2IP(y, out_a, out);
5959
}
6060
}
@@ -66,7 +66,7 @@ WORD32 xa_nn_elm_div_mode_f32xf32_f32(FLOAT32 * __restrict__ p_out,
6666
XT_LASX2IP(x1, inp1_a, inp1);
6767
XT_LASX2IP(x2, inp2_a, inp2);
6868
y = XT_DIV_SX2(x1, x2);
69-
y = FIFLOOR_SX2(y);
69+
y = XT_FIFLOOR_SX2(y);
7070
XT_SASX2IP(y, out_a, out);
7171
}
7272
}
@@ -80,9 +80,9 @@ WORD32 xa_nn_elm_div_mode_f32xf32_f32(FLOAT32 * __restrict__ p_out,
8080
XT_LSIP(a2, (xtfloat *)inp2, 0);
8181
a = XT_DIV_S(a1, a2);
8282
if(mode == 0)
83-
a = FITRUNC_S(a);
83+
a = XT_FITRUNC_S(a);
8484
else
85-
a = FIFLOOR_S(a);
85+
a = XT_FIFLOOR_S(a);
8686
XT_SSI(a, (xtfloat *)out, 0);
8787
}
8888

@@ -138,7 +138,7 @@ static void internal_elm_div_mode_broadcast_2D_f32xf32_f32(FLOAT32 * __restrict_
138138
XT_LSX2IP(x1, p_a, 2 * sizeof(FLOAT32));
139139
XT_LSX2IP(x2, p_b, 2 * sizeof(FLOAT32));
140140
y = XT_DIV_SX2(x2, x1);
141-
y = FITRUNC_SX2(y);
141+
y = XT_FITRUNC_SX2(y);
142142
XT_SSX2IP(y, p_c, 2 * sizeof(FLOAT32));
143143
}
144144
}
@@ -149,7 +149,7 @@ static void internal_elm_div_mode_broadcast_2D_f32xf32_f32(FLOAT32 * __restrict_
149149
XT_LSX2IP(x1, p_a, 2 * sizeof(FLOAT32));
150150
XT_LSX2IP(x2, p_b, 2 * sizeof(FLOAT32));
151151
y = XT_DIV_SX2(x2, x1);
152-
y = FIFLOOR_SX2(y);
152+
y = XT_FIFLOOR_SX2(y);
153153
XT_SSX2IP(y, p_c, 2 * sizeof(FLOAT32));
154154
}
155155
}
@@ -166,7 +166,7 @@ static void internal_elm_div_mode_broadcast_2D_f32xf32_f32(FLOAT32 * __restrict_
166166
XT_LASX2IP(x1, vinp1, p_a);
167167
XT_LASX2IP(x2, vinp2, p_b);
168168
y = XT_DIV_SX2(x2, x1);
169-
y = FITRUNC_SX2(y);
169+
y = XT_FITRUNC_SX2(y);
170170
XT_SASX2IP(y, out_a, p_c);
171171
}
172172
}
@@ -177,7 +177,7 @@ static void internal_elm_div_mode_broadcast_2D_f32xf32_f32(FLOAT32 * __restrict_
177177
XT_LASX2IP(x1, vinp1, p_a);
178178
XT_LASX2IP(x2, vinp2, p_b);
179179
y = XT_DIV_SX2(x2, x1);
180-
y = FIFLOOR_SX2(y);
180+
y = XT_FIFLOOR_SX2(y);
181181
XT_SASX2IP(y, out_a, p_c);
182182
}
183183
}
@@ -189,9 +189,9 @@ static void internal_elm_div_mode_broadcast_2D_f32xf32_f32(FLOAT32 * __restrict_
189189
XT_LSIP(b0, (xtfloat *)p_b, sizeof(FLOAT32));
190190
c0 = XT_DIV_S(b0, a0);
191191
if(mode == 0)
192-
c0 = FITRUNC_S(c0);
192+
c0 = XT_FITRUNC_S(c0);
193193
else
194-
c0 = FIFLOOR_S(c0);
194+
c0 = XT_FIFLOOR_S(c0);
195195
XT_SSI(c0, (xtfloat *)p_c, 0);
196196
}
197197
}
@@ -213,7 +213,7 @@ static void internal_elm_div_mode_broadcast_2D_f32xf32_f32(FLOAT32 * __restrict_
213213
XT_LSX2IP(x1, p_a, 2 * sizeof(FLOAT32));
214214
XT_LSX2IP(x2, p_b, 2 * sizeof(FLOAT32));
215215
y = XT_DIV_SX2(x1, x2);
216-
y = FITRUNC_SX2(y);
216+
y = XT_FITRUNC_SX2(y);
217217
XT_SSX2IP(y, p_c, 2 * sizeof(FLOAT32));
218218
}
219219
}
@@ -224,7 +224,7 @@ static void internal_elm_div_mode_broadcast_2D_f32xf32_f32(FLOAT32 * __restrict_
224224
XT_LSX2IP(x1, p_a, 2 * sizeof(FLOAT32));
225225
XT_LSX2IP(x2, p_b, 2 * sizeof(FLOAT32));
226226
y = XT_DIV_SX2(x1, x2);
227-
y = FIFLOOR_SX2(y);
227+
y = XT_FIFLOOR_SX2(y);
228228
XT_SSX2IP(y, p_c, 2 * sizeof(FLOAT32));
229229
}
230230
}
@@ -241,7 +241,7 @@ static void internal_elm_div_mode_broadcast_2D_f32xf32_f32(FLOAT32 * __restrict_
241241
XT_LASX2IP(x1, vinp1, p_a);
242242
XT_LASX2IP(x2, vinp2, p_b);
243243
y = XT_DIV_SX2(x1, x2);
244-
y = FITRUNC_SX2(y);
244+
y = XT_FITRUNC_SX2(y);
245245
XT_SASX2IP(y, out_a, p_c);
246246
}
247247
}
@@ -252,7 +252,7 @@ static void internal_elm_div_mode_broadcast_2D_f32xf32_f32(FLOAT32 * __restrict_
252252
XT_LASX2IP(x1, vinp1, p_a);
253253
XT_LASX2IP(x2, vinp2, p_b);
254254
y = XT_DIV_SX2(x1, x2);
255-
y = FIFLOOR_SX2(y);
255+
y = XT_FIFLOOR_SX2(y);
256256
XT_SASX2IP(y, out_a, p_c);
257257
}
258258
}
@@ -264,9 +264,9 @@ static void internal_elm_div_mode_broadcast_2D_f32xf32_f32(FLOAT32 * __restrict_
264264
XT_LSIP(b0, (xtfloat *)p_b, sizeof(FLOAT32));
265265
c0 = XT_DIV_S(a0, b0);
266266
if(mode == 0)
267-
c0 = FITRUNC_S(c0);
267+
c0 = XT_FITRUNC_S(c0);
268268
else
269-
c0 = FIFLOOR_S(c0);
269+
c0 = XT_FIFLOOR_S(c0);
270270
XT_SSI(c0, (xtfloat *)p_c, 0);
271271
}
272272
}
@@ -302,7 +302,7 @@ static void internal_elm_div_mode_broadcast_f32xf32_f32(FLOAT32 * __restrict__ p
302302
{
303303
XT_LSX2IP(x1, p_a, 2 * sizeof(FLOAT32));
304304
y = XT_DIV_SX2(x2, x1);
305-
y = FITRUNC_SX2(y);
305+
y = XT_FITRUNC_SX2(y);
306306
XT_SSX2IP(y, p_c, 2 * sizeof(FLOAT32));
307307
}
308308
}
@@ -312,7 +312,7 @@ static void internal_elm_div_mode_broadcast_f32xf32_f32(FLOAT32 * __restrict__ p
312312
{
313313
XT_LSX2IP(x1, p_a, 2 * sizeof(FLOAT32));
314314
y = XT_DIV_SX2(x2, x1);
315-
y = FIFLOOR_SX2(y);
315+
y = XT_FIFLOOR_SX2(y);
316316
XT_SSX2IP(y, p_c, 2 * sizeof(FLOAT32));
317317
}
318318
}
@@ -328,7 +328,7 @@ static void internal_elm_div_mode_broadcast_f32xf32_f32(FLOAT32 * __restrict__ p
328328
{
329329
XT_LASX2IP(x1, inp1_a, p_a);
330330
y = XT_DIV_SX2(x2, x1);
331-
y = FITRUNC_SX2(y);
331+
y = XT_FITRUNC_SX2(y);
332332
XT_SASX2IP(y, out_a, p_c);
333333
}
334334
}
@@ -338,7 +338,7 @@ static void internal_elm_div_mode_broadcast_f32xf32_f32(FLOAT32 * __restrict__ p
338338
{
339339
XT_LASX2IP(x1, inp1_a, p_a);
340340
y = XT_DIV_SX2(x2, x1);
341-
y = FIFLOOR_SX2(y);
341+
y = XT_FIFLOOR_SX2(y);
342342
XT_SASX2IP(y, out_a, p_c);
343343
}
344344
}
@@ -349,9 +349,9 @@ static void internal_elm_div_mode_broadcast_f32xf32_f32(FLOAT32 * __restrict__ p
349349
XT_LSIP(a0_7, (xtfloat *)p_a, sizeof(FLOAT32));
350350
out = XT_DIV_S(x2, a0_7);
351351
if(mode == 0)
352-
out = FITRUNC_S(out);
352+
out = XT_FITRUNC_S(out);
353353
else
354-
out = FIFLOOR_S(out);
354+
out = XT_FIFLOOR_S(out);
355355
XT_SSI(out, (xtfloat *)p_c, 0);
356356
}
357357
}
@@ -366,7 +366,7 @@ static void internal_elm_div_mode_broadcast_f32xf32_f32(FLOAT32 * __restrict__ p
366366
{
367367
XT_LSX2IP(x1, p_a, 2 * sizeof(FLOAT32));
368368
y = XT_DIV_SX2(x1, x2);
369-
y = FITRUNC_SX2(y);
369+
y = XT_FITRUNC_SX2(y);
370370
XT_SSX2IP(y, p_c, 2 * sizeof(FLOAT32));
371371
}
372372
}
@@ -376,7 +376,7 @@ static void internal_elm_div_mode_broadcast_f32xf32_f32(FLOAT32 * __restrict__ p
376376
{
377377
XT_LSX2IP(x1, p_a, 2 * sizeof(FLOAT32));
378378
y = XT_DIV_SX2(x1, x2);
379-
y = FIFLOOR_SX2(y);
379+
y = XT_FIFLOOR_SX2(y);
380380
XT_SSX2IP(y, p_c, 2 * sizeof(FLOAT32));
381381
}
382382
}
@@ -392,7 +392,7 @@ static void internal_elm_div_mode_broadcast_f32xf32_f32(FLOAT32 * __restrict__ p
392392
{
393393
XT_LASX2IP(x1, inp1_a, p_a);
394394
y = XT_DIV_SX2(x1, x2);
395-
y = FITRUNC_SX2(y);
395+
y = XT_FITRUNC_SX2(y);
396396
XT_SASX2IP(y, out_a, p_c);
397397
}
398398
}
@@ -402,7 +402,7 @@ static void internal_elm_div_mode_broadcast_f32xf32_f32(FLOAT32 * __restrict__ p
402402
{
403403
XT_LASX2IP(x1, inp1_a, p_a);
404404
y = XT_DIV_SX2(x1, x2);
405-
y = FIFLOOR_SX2(y);
405+
y = XT_FIFLOOR_SX2(y);
406406
XT_SASX2IP(y, out_a, p_c);
407407
}
408408
}
@@ -413,9 +413,9 @@ static void internal_elm_div_mode_broadcast_f32xf32_f32(FLOAT32 * __restrict__ p
413413
XT_LSIP(a0_7, (xtfloat *)p_a, sizeof(FLOAT32));
414414
out = XT_DIV_S(a0_7, x2);
415415
if(mode == 0)
416-
out = FITRUNC_S(out);
416+
out = XT_FITRUNC_S(out);
417417
else
418-
out = FIFLOOR_S(out);
418+
out = XT_FIFLOOR_S(out);
419419
XT_SSI(out, (xtfloat *)p_c, 0);
420420
}
421421
}

0 commit comments

Comments
 (0)