Skip to content

Commit 4f40c44

Browse files
authored
Merge branch 'main' into matmul_unmark_flaky
2 parents 8eb6f7b + 9aedbeb commit 4f40c44

File tree

23 files changed

+203
-146
lines changed

23 files changed

+203
-146
lines changed

.github/workflows/pull.yml

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -343,34 +343,6 @@ jobs:
343343
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
344344
PYTHON_EXECUTABLE=python bash examples/xnnpack/quantization/test_quantize.sh "${BUILD_TOOL}" mv2
345345
346-
test-pybind-build-linux:
347-
name: test-pybind-build-linux
348-
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
349-
permissions:
350-
id-token: write
351-
contents: read
352-
strategy:
353-
fail-fast: false
354-
with:
355-
runner: linux.2xlarge
356-
docker-image: executorch-ubuntu-22.04-clang12
357-
submodules: 'recursive'
358-
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
359-
timeout: 90
360-
script: |
361-
# The generic Linux job chooses to use base env, not the one setup by the image
362-
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
363-
conda activate "${CONDA_ENV}"
364-
365-
# build module for executorch.extension.pybindings.portable_lib
366-
BUILD_TOOL="cmake"
367-
PYTHON_EXECUTABLE=python \
368-
CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON" \
369-
bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
370-
371-
# see if we can import the module successfully
372-
python -c "from executorch.extension.pybindings import portable_lib; print('success!')"
373-
374346
test-binary-size-linux-gcc:
375347
name: test-binary-size-linux-gcc
376348
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main

.github/workflows/trunk.yml

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -289,30 +289,6 @@ jobs:
289289
# Build and test coreml delegate
290290
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh
291291
292-
test-pybind-build-macos:
293-
name: test-pybind-build-macos
294-
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
295-
strategy:
296-
matrix:
297-
include:
298-
- build-tool: cmake
299-
fail-fast: false
300-
with:
301-
runner: macos-m1-stable
302-
python-version: '3.11'
303-
submodules: 'recursive'
304-
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
305-
timeout: 180
306-
script: |
307-
bash .ci/scripts/setup-conda.sh
308-
309-
# build module for executorch.extension.pybindings.portable_lib
310-
BUILD_TOOL=${{ matrix.build-tool }}
311-
CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON" PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
312-
313-
# see if we can import the module successfully
314-
${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')"
315-
316292
test-static-llama-ane:
317293
name: test-static-llama-ane
318294
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main

backends/arm/scripts/build_executor_runner.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ then
103103
memory_mode="Shared_Sram"
104104
if [[ ${target} =~ "ethos-u85" ]]
105105
then
106-
memory_mode="Dedicated_Sram_384KB"
106+
memory_mode="Sram_Only"
107107
fi
108108
fi
109109

backends/arm/test/test_arm_baremetal.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,10 @@ test_models_ethos-u85() { # End to End model tests using model_test.py
210210
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-512 --model=mv3 --extra_flags="-DET_ATOL=5.00 -DET_RTOL=5.00"
211211
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=lstm --extra_flags="-DET_ATOL=0.03 -DET_RTOL=0.03"
212212
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=w2l --extra_flags="-DET_ATOL=0.01 -DET_RTOL=0.01"
213-
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-256 --model=ic4 --extra_flags="-DET_ATOL=0.8 -DET_RTOL=0.8" --timeout=2400
213+
# Temporarily not test inception_v4 on Ethos-U85. To support inception_v4 properly on Ethos-U85, we need to run the model in Dedicated_Sram memory mode with
214+
# 384KB(or another amount lower than 2MB) of SRAM passed as fast scratch area. The PR adding support for Dedicated_Sram(https://github.com/pytorch/executorch/pull/10714)
215+
# was reverted due to a change required in an internal variant of the examples/arm/executor_runner/arm_executor_runner.cpp
216+
# python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-256 --model=ic4 --extra_flags="-DET_ATOL=0.8 -DET_RTOL=0.8" --timeout=2400
214217

215218
echo "${TEST_SUITE_NAME}: PASS"
216219
}

backends/arm/test/test_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def get_args():
8181
if "u55" in args.target:
8282
args.memory_mode = "Shared_Sram"
8383
elif "u85" in args.target:
84-
args.memory_mode = "Dedicated_Sram_384KB"
84+
args.memory_mode = "Sram_Only"
8585
else:
8686
raise RuntimeError(f"Invalid target name {args.target}")
8787

backends/cadence/aot/replace_ops.py

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -283,31 +283,6 @@ def call_operator(self, op, args, kwargs, meta):
283283
return super().call_operator(op, args, kwargs, meta)
284284

285285

286-
@register_cadence_pass(CadencePassAttribute(opt_level=0))
287-
class ReplaceTCopyWithTransposePass(ExportPass):
288-
"""
289-
Replace t_copy with transpose_copy.int. If the input is 1D, the t_copy is
290-
a nop. t_copy is not supported, so this is an opt_level=0 pass.
291-
"""
292-
293-
def call_operator(self, op, args, kwargs, meta):
294-
if get_edge_overload_packet(op) != exir_ops.edge.aten.t_copy:
295-
return super().call_operator(op, args, kwargs, meta)
296-
297-
# Get the input tensor shape
298-
in_tensor = args[0].to_tensor() if isinstance(args[0], ProxyValue) else args[0]
299-
300-
# If the input is a 1D tensor, this t_copy is a nop, so return the input
301-
if in_tensor.dim() <= 1:
302-
return args[0]
303-
304-
assert in_tensor.dim() == 2, "t_copy expects a tensor with <= 2 dimensions"
305-
transpose_args = (args[0], 0, 1)
306-
return super().call_operator(
307-
exir_ops.edge.aten.transpose_copy.int, transpose_args, kwargs, meta
308-
)
309-
310-
311286
@register_cadence_pass(CadencePassAttribute(opt_level=0))
312287
class ReplaceMMWithAddMMPass(ExportPass):
313288
"""
@@ -2407,7 +2382,6 @@ class CadenceReplaceOpsInGraph:
24072382
passes = [
24082383
ReplaceEmptyTensorsWithFullPass,
24092384
ReplaceFunctionallyEquivalentOpTargets,
2410-
ReplaceTCopyWithTransposePass,
24112385
ReplacePermuteWithTransposePass,
24122386
ReplaceScalarWithTensorArgPass,
24132387
ReplaceConvolutionOptionalArgsWithConcreteArgsPass,

backends/cadence/aot/tests/test_fusion_ops_passes.py

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -295,11 +295,12 @@ def test_no_replace_quant_permute_dequant_with_requantize(self):
295295
args=(permute, 4.5, 6, 0, 127, torch.int8),
296296
)
297297
builder.output(dequant)
298-
graph_module = FuseQuantDequantToRequantizePass(
298+
original_graph = builder.get_graph_module()
299+
converted_graph = FuseQuantDequantToRequantizePass(
299300
force_quant_dequant_fusion=False
300-
)(builder.get_graph_module()).graph_module
301+
)(original_graph).graph_module
301302
self.check_op_counts(
302-
graph_module,
303+
converted_graph,
303304
expected_op_counts={
304305
# Verify that no dequant/quant pair was replaced with requantize.
305306
# quantize -> permute -> dequantize should not be replaced with requantize.
@@ -310,30 +311,28 @@ def test_no_replace_quant_permute_dequant_with_requantize(self):
310311
)
311312

312313
def test_replace_quant_view_dequant_with_requantize(self):
313-
class M(torch.nn.Module):
314-
def __init__(self):
315-
super().__init__()
316-
317-
def forward(self, x):
318-
x = torch.ops.quantized_decomposed.quantize_per_tensor(
319-
x, 1.2, 3, 0, 127, torch.int8
320-
)
321-
x = x.view(-1)
322-
x = torch.ops.quantized_decomposed.dequantize_per_tensor(
323-
x, 4.5, 6, 0, 127, torch.int8
324-
)
325-
return x
326-
327-
inputs = torch.randn(2, 12, 1, 6)
328-
model = M()
329-
graph_module = export_to_edge(model, (inputs,)).exported_program().graph_module
330-
graph_module = FuseQuantDequantToRequantizePass()(graph_module).graph_module
331-
314+
builder = GraphBuilder()
315+
x = builder.placeholder("x", torch.randn(2, 12, 1, 6, dtype=torch.float32))
316+
quant = builder.call_operator(
317+
op=exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
318+
args=(x, 1.2, 3, 0, 127, torch.int8),
319+
)
320+
view = builder.call_operator(
321+
op=exir_ops.edge.aten.view_copy.default, args=(quant, [-1])
322+
)
323+
dequant = builder.call_operator(
324+
op=exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
325+
args=(view, 4.5, 6, 0, 127, torch.int8),
326+
)
327+
builder.output(dequant)
328+
original_graph = builder.get_graph_module()
329+
converted_graph = FuseQuantDequantToRequantizePass()(
330+
original_graph
331+
).graph_module
332332
self.check_op_counts(
333-
graph_module,
333+
converted_graph,
334334
expected_op_counts={
335-
# Verify that no dequant/quant pair was replaced with requantize.
336-
# quantize -> permute -> dequantize should not be replaced with requantize.
335+
# Verify that dequant/quant pair was replaced with requantize.
337336
exir_ops.edge.quantized_decomposed.quantize_per_tensor.default: 0,
338337
exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default: 0,
339338
exir_ops.edge.cadence.requantize.default: 1,

backends/cadence/aot/tests/test_replace_ops_passes.py

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
ReplaceSingleElementTensorArgumentsFromFullOpWithScalarPass,
4949
ReplaceSplitWithSlicePass,
5050
ReplaceSqueezeAndUnsqueezeWithViewPass,
51-
ReplaceTCopyWithTransposePass,
5251
ReplaceTransposedConvWithLinearPass,
5352
ReplaceTrivialConvWithLinear,
5453
ReplaceWhereWithFullArgsWithWhereScalar,
@@ -368,37 +367,6 @@ def forward(self, x: torch.Tensor):
368367
0,
369368
)
370369

371-
@parameterized.expand(
372-
[
373-
[(16, 32)],
374-
[(1, 240)],
375-
[(4, 16)],
376-
]
377-
)
378-
@torch.no_grad()
379-
def test_replace_t_copy_with_transpose(self, shape: Tuple[int]):
380-
class TCopy(torch.nn.Module):
381-
def forward(self, x: torch.Tensor):
382-
return exir_ops.edge.aten.t_copy(x)
383-
384-
w = torch.randn(shape)
385-
inputs = (w,)
386-
p1 = ReplaceTCopyWithTransposePass()
387-
p2 = ReplacePermuteWithTransposePass()
388-
model = TCopy()
389-
graph_module = export_to_edge(model, inputs).exported_program().graph_module
390-
graph_after_passes = cast(
391-
PassResult, p2(cast(PassResult, p1(graph_module)).graph_module)
392-
).graph_module
393-
self.assertEqual(
394-
count_node(graph_after_passes, exir_ops.edge.aten.transpose_copy.int),
395-
1,
396-
)
397-
self.assertEqual(
398-
count_node(graph_after_passes, exir_ops.edge.aten.t_copy),
399-
0,
400-
)
401-
402370
@parameterized.expand(
403371
[
404372
[(1, 8, 33), 8, 16, 3],

backends/xnnpack/operators/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
op_hardtanh,
2525
op_leaky_relu,
2626
op_linear,
27+
op_log,
2728
op_matrix_multiplication,
2829
op_max_dim,
2930
op_max_pool2d,

backends/xnnpack/operators/op_log.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
from typing import Dict
8+
9+
import torch
10+
from executorch.backends.xnnpack.operators.node_visitor import (
11+
NodeVisitor,
12+
register_node_visitor,
13+
)
14+
from executorch.backends.xnnpack.serialization.xnnpack_graph_schema import (
15+
XNNGraph,
16+
XNNLog,
17+
XNode,
18+
)
19+
from executorch.backends.xnnpack.utils.utils import get_input_node
20+
21+
22+
@register_node_visitor
23+
class LogVisitor(NodeVisitor):
24+
target = "aten.log.default"
25+
26+
def __init__(self, *args) -> None:
27+
super().__init__(*args)
28+
29+
def define_node(
30+
self,
31+
node: torch.fx.Node,
32+
xnn_graph: XNNGraph,
33+
vals_to_ids: Dict[torch.fx.Node, int],
34+
debug_handle: int,
35+
) -> None:
36+
self.define_nodes_tensor_inputs_outputs(node, xnn_graph, vals_to_ids)
37+
38+
# input
39+
input_id = vals_to_ids[get_input_node(node, 0)]
40+
41+
# output
42+
output_id = vals_to_ids[node]
43+
44+
ser_node = XNode(
45+
xnode_union=XNNLog(
46+
input_id=input_id,
47+
output_id=output_id,
48+
flags=0,
49+
),
50+
debug_handle=debug_handle,
51+
)
52+
xnn_graph.xnodes.append(ser_node)

backends/xnnpack/partition/config/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
# EluConfig,
3131
HardtanhConfig,
3232
LeakyReLUConfig,
33+
LogConfig,
3334
MaximumConfig,
3435
MaxPool2dConfig,
3536
MeanDimConfig,
@@ -82,6 +83,7 @@
8283
HardswishConfig,
8384
LeakyReLUConfig,
8485
LinearConfig,
86+
LogConfig,
8587
MaxDimConfig,
8688
MaximumConfig,
8789
MaxPool2dConfig,

backends/xnnpack/partition/config/generic_node_configs.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,13 @@ def supported_precision_types(self) -> List[ConfigPrecisionType]:
357357
return [ConfigPrecisionType.FP32]
358358

359359

360+
class LogConfig(GenericNodePartitionerConfig):
361+
target_name = "log.default"
362+
363+
def supported_precision_types(self) -> List[ConfigPrecisionType]:
364+
return [ConfigPrecisionType.FP32]
365+
366+
360367
class MeanDimConfig(GenericNodePartitionerConfig):
361368
target_name = "mean.dim"
362369

backends/xnnpack/partition/configs.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
exir_ops.edge.aten.leaky_relu.default,
6565
exir_ops.edge.aten.addmm.default, # TODO(T163877189) add constraint for addmm
6666
exir_ops.edge.aten.rsqrt.default,
67+
exir_ops.edge.aten.log.default,
6768
]
6869

6970
SUPPORTED_MODULES = [

backends/xnnpack/runtime/XNNCompiler.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1418,6 +1418,36 @@ Error defineReciprocalSquareRootNode(
14181418
return Error::Ok;
14191419
}
14201420

1421+
/*
1422+
Define serialized log node into the subgraph, using the remapped ids
1423+
to map the serialized ids, to the new ids generated when defining the
1424+
tensor value
1425+
*/
1426+
Error defineLogNode(
1427+
xnn_subgraph_t subgraph_ptr,
1428+
const std::unordered_map<uint32_t, uint32_t>& remapped_ids,
1429+
const NodePtr node,
1430+
const fb_xnnpack::XNNGraph* graph) noexcept {
1431+
MAYBE_UNUSED(graph);
1432+
1433+
auto graph_node = node->xnode_union_as_XNNLog();
1434+
1435+
xnn_status status = xnn_define_log(
1436+
subgraph_ptr,
1437+
remapped_ids.at(graph_node->input_id()),
1438+
remapped_ids.at(graph_node->output_id()),
1439+
graph_node->flags());
1440+
1441+
ET_CHECK_OR_RETURN_ERROR(
1442+
status == xnn_status_success,
1443+
Internal,
1444+
"Failed to create log node %i with code: %s",
1445+
node->debug_handle(),
1446+
xnn_status_to_string(status));
1447+
1448+
return Error::Ok;
1449+
}
1450+
14211451
/*
14221452
Define serialized ceiling node into the subgraph, using the remapped ids
14231453
to map the serialized ids, to the new ids generated when defining the
@@ -1981,6 +2011,7 @@ DefineNodeFunc getDefineNodeFunc(fb_xnnpack::XNodeUnion nodeType) {
19812011
_DEFINE(Ceiling)
19822012
_DEFINE(Hardswish)
19832013
_DEFINE(LeakyReLU)
2014+
_DEFINE(Log)
19842015
_DEFINE(Maximum)
19852016
_DEFINE(Negate)
19862017
_DEFINE(Square)

0 commit comments

Comments
 (0)