Skip to content

Commit 662cb81

Browse files
authored
Merge branch 'pytorch:main' into add-profiling-to-xnn-executor-runner-2
2 parents d9f3269 + 7bc06d1 commit 662cb81

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+996
-700
lines changed

backends/arm/operator_support/to_copy_support.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ def is_node_supported(self, node: fx.Node, tosa_spec: TosaSpecification) -> bool
125125
# Check dim_order (to_dim_order_copy)
126126
if "dim_order" in node.kwargs:
127127
dim_order = node.kwargs["dim_order"]
128+
# pyre-ignore[6]
128129
if dim_order != list(range(len(dim_order))):
129130
logger.info(
130131
f"Argument {dim_order=} is not supported for "

backends/cadence/aot/compiler.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
ExecutorchProgramManager,
3434
to_edge,
3535
)
36+
from executorch.exir.dialects._ops import ops as exir_ops
3637
from executorch.exir.pass_base import PassResult
3738
from executorch.exir.passes import ToOutVarPass
3839
from executorch.exir.passes.sym_shape_eval_pass import HintBasedSymShapeEvalPass
@@ -186,14 +187,17 @@ def export_to_edge(
186187
edge_prog_manager = to_edge(
187188
expo_program,
188189
compile_config=EdgeCompileConfig(
189-
_skip_dim_order=True,
190190
# Allow specific non-core aten ops in the IR.
191191
_core_aten_ops_exception_list=[
192192
torch.ops.aten._native_batch_norm_legit_functional.default,
193193
torch.ops.aten.linear.default,
194194
torch.ops.aten.linalg_vector_norm.default,
195195
torch.ops.aten.unfold.default,
196196
torch.ops.aten.angle.default,
197+
# cadence replaced to_dim_order_copy with _to_copy for performance
198+
# skip _to_copy op to get around of dim order check
199+
# We should remove this op once cadence can support dim order
200+
exir_ops.edge.aten._to_copy.default,
197201
],
198202
),
199203
constant_methods=constant_methods,

backends/cadence/aot/replace_ops.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
# pyre-unsafe
1313

14+
import copy
1415
import math
1516
from operator import neg
1617
from typing import cast, Dict, Iterable, Sequence, Set, Tuple
@@ -35,7 +36,12 @@
3536
from executorch.backends.cadence.aot.utils import get_edge_overload_packet
3637
from executorch.exir.dialects._ops import ops as exir_ops
3738
from executorch.exir.dialects.edge._ops import EdgeOpOverload, EdgeOpOverloadPacket
39+
from executorch.exir.dim_order_utils import get_memory_format
3840
from executorch.exir.pass_base import ExportPass, NodeMetadata, PassResult, ProxyValue
41+
from executorch.exir.passes.dim_order_ops_registry import (
42+
DimOrderOpsMap,
43+
MemoryFormatOpsMap,
44+
)
3945
from torch._subclasses import FakeTensor
4046
from torch.fx.node import Argument
4147

@@ -1799,6 +1805,72 @@ def call_operator(
17991805
)
18001806

18011807

1808+
@register_cadence_pass(CadencePassAttribute(opt_level=0))
1809+
class ReplaceToDimOrderCopyWithToCopyPass(ExportPass):
1810+
"""
1811+
dim_order_ops::to_dim_order_copy is not supported, so this is an opt_level=0 pass.
1812+
If the dim order is sequential, we don't need the extra work with strides and
1813+
can just use to_copy.
1814+
"""
1815+
1816+
def call_operator(
1817+
self,
1818+
op,
1819+
args: Tuple[Argument, ...],
1820+
kwargs: Dict[str, Argument],
1821+
meta: NodeMetadata,
1822+
) -> ProxyValue:
1823+
if op not in DimOrderOpsMap:
1824+
return super().call_operator(op, args, kwargs, meta)
1825+
1826+
# new kwargs with dim_order, and no memory_format for the new op
1827+
nkwargs = dict(copy.deepcopy(kwargs)) # orig kwargs are immutable
1828+
1829+
ndim = None
1830+
1831+
# can always get the shape, assuming rank is specialized
1832+
1833+
# pyre-ignore[16]: `None` has no attribute `to_tensor`
1834+
if isinstance(args[0], ProxyValue) and args[0].is_tensor():
1835+
# pyre-ignore[16]: `None` has no attribute `to_tensor`
1836+
ndim = args[0].to_tensor().dim()
1837+
elif isinstance(args[0], torch.Tensor):
1838+
# pyre-ignore[16]: `None` has no attribute `dim`
1839+
ndim = args[0].dim()
1840+
elif isinstance(args[0], torch.fx.immutable_collections.immutable_list):
1841+
# pyre-ignore[6]: Incompatible parameter type
1842+
ndim = len(args[0])
1843+
else:
1844+
assert 0, f"Expecting a Tensor or a ProxyValue but got {type(args[0])}"
1845+
1846+
# get the "to" memory format for the EdgeOp
1847+
contiguous_dim_order = list(range(ndim))
1848+
dim_order = nkwargs.pop("dim_order", None)
1849+
1850+
# Cadence only supports contiguous memory format
1851+
assert (
1852+
dim_order is None
1853+
# pyre-ignore[6]: Incompatible parameter type
1854+
or len(dim_order) == 0
1855+
or dim_order == contiguous_dim_order
1856+
), "Expected dim order in congituous or prevserve memory format, but got {}".format(
1857+
dim_order
1858+
)
1859+
1860+
# bring back memory format
1861+
# pyre-ignore[6]: Incompatible parameter type
1862+
nkwargs["memory_format"] = get_memory_format(dim_order)
1863+
1864+
memory_format_op = MemoryFormatOpsMap[op]
1865+
1866+
return super().call_operator(
1867+
memory_format_op,
1868+
args,
1869+
nkwargs,
1870+
meta,
1871+
)
1872+
1873+
18021874
@register_cadence_pass(CadencePassAttribute(opt_level=0))
18031875
class ReplaceFullLikeWithFullPass(ExportPass):
18041876
"""
@@ -2108,4 +2180,5 @@ class CadenceReplaceOpsInGraph:
21082180
ReplaceSingleElementTensorArgumentsFromFullOpWithScalarPass,
21092181
ReplaceAtenAvgPoolWithJarvisAvgPoolPass,
21102182
ReplaceAtenLinalgVectorNormWithCadenceLinalgVectorNormPass,
2183+
ReplaceToDimOrderCopyWithToCopyPass,
21112184
]

backends/cadence/fusion_g3/operators/op_exp.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ Tensor& exp_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
4949
out);
5050
#endif
5151

52-
if (out.scalar_type() == ScalarType::Float) {
53-
float* const out_data = out.mutable_data_ptr<float>();
54-
const float* const in_data = in.const_data_ptr<float>();
52+
if (in.scalar_type() == ScalarType::Float) {
53+
float* __restrict__ out_data = out.mutable_data_ptr<float>();
54+
const float* __restrict__ in_data = in.const_data_ptr<float>();
5555

5656
XT_KERNEL_CHECK(
5757
ctx, out, xa_nn_elm_exp_f32_f32, out_data, in_data, out.numel());
@@ -66,4 +66,4 @@ Tensor& exp_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
6666
} // namespace native
6767
} // namespace G3
6868
} // namespace impl
69-
} // namespace cadence
69+
} // namespace cadence

backends/cadence/hifi/kernels/targets.bzl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,17 @@ load("@fbsource//tools/build_defs:platform_defs.bzl", "CXX")
22
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
33

44
def define_common_targets():
5+
common_deps = [
6+
"//executorch/runtime/kernel:kernel_includes",
7+
]
8+
59
runtime.cxx_library(
610
name = "kernels",
711
srcs = ["kernels.cpp"],
812
exported_headers = [
913
"kernels.h",
1014
],
15+
deps = common_deps,
1116
visibility = [
1217
"//executorch/backends/cadence/...",
1318
],

backends/cadence/hifi/operators/op_where.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ namespace impl {
2828
namespace HiFi {
2929
namespace native {
3030

31-
Tensor& where_out(
31+
Tensor& where_self_out(
3232
RuntimeContext& ctx,
3333
const Tensor& cond,
3434
const Tensor& a,

backends/cadence/hifi/third-party/nnlib/xa_nn_elm_clamp_f32_broadcast.c

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,11 @@
1919
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2020
2121
******************************************************************************/
22-
#include "nnlib-hifi4/xa_nnlib/include/xa_type_def.h"
23-
#include "nnlib-hifi4/xa_nnlib/algo/common/include/xa_nnlib_common_fpu.h"
24-
#include "nnlib-hifi4/xa_nnlib/algo/common/include/xa_nn_common.h"
25-
#include "nnlib-hifi4/xa_nnlib/algo/common/include/xa_nnlib_err_chk.h"
26-
#include "nnlib-hifi4/xa_nnlib/algo/kernels/basic/hifi4/xa_nn_basic_state.h"
27-
#include "nnlib-hifi4/xa_nnlib/include/nnlib/xa_nnlib_kernels_api.h"
22+
#include "xa_type_def.h"
23+
#include "xa_nnlib_common_fpu.h"
24+
#include "xa_nn_common.h"
25+
#include "xa_nnlib_err_chk.h"
26+
#include "xa_nnlib_kernels_api.h"
2827

2928

3029
#if !HAVE_VFPU

0 commit comments

Comments
 (0)