pytorch
diff --git a/‎.github/workflows/_link_check.yml
Lines changed: 30 additions & 14 deletions b/‎.github/workflows/_link_check.yml
Lines changed: 30 additions & 14 deletions
diff --git a/‎.github/workflows/build-presets.yml
Lines changed: 13 additions & 0 deletions b/‎.github/workflows/build-presets.yml
Lines changed: 13 additions & 0 deletions
diff --git a/‎CMakeLists.txt
Lines changed: 1 addition & 1 deletion b/‎CMakeLists.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/_passes/TARGETS
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/TARGETS
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/__init__.py
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/annotate_decomposed_matmul.py
Lines changed: 9 additions & 5 deletions b/‎backends/arm/_passes/annotate_decomposed_matmul.py
Lines changed: 9 additions & 5 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 2 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/arm/_passes/convert_split_to_slice.py
Lines changed: 6 additions & 5 deletions b/‎backends/arm/_passes/convert_split_to_slice.py
Lines changed: 6 additions & 5 deletions
diff --git a/‎backends/arm/_passes/replace_inf_values_pass.py
Lines changed: 45 additions & 0 deletions b/‎backends/arm/_passes/replace_inf_values_pass.py
Lines changed: 45 additions & 0 deletions
diff --git a/‎backends/arm/operator_support/slice_copy_support.py
Lines changed: 2 additions & 3 deletions b/‎backends/arm/operator_support/slice_copy_support.py
Lines changed: 2 additions & 3 deletions
diff --git a/‎backends/arm/operator_support/tosa_supported_operators.py
Lines changed: 2 additions & 0 deletions b/‎backends/arm/operator_support/tosa_supported_operators.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/arm/operators/__init__.py
Lines changed: 1 addition & 0 deletions b/‎backends/arm/operators/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/operators/op_abs.py
Lines changed: 9 additions & 0 deletions b/‎backends/arm/operators/op_abs.py
Lines changed: 9 additions & 0 deletions
diff --git a/‎backends/arm/operators/op_add.py
Lines changed: 9 additions & 0 deletions b/‎backends/arm/operators/op_add.py
Lines changed: 9 additions & 0 deletions
diff --git a/‎backends/arm/operators/op_amax.py
Lines changed: 7 additions & 0 deletions b/‎backends/arm/operators/op_amax.py
Lines changed: 7 additions & 0 deletions
@@ -7,35 +7,51 @@ on:
 
 jobs:
   lint-urls:
+    if: ${{ github.event_name != 'pull_request' || !contains(github.event.pull_request.labels.*.name, 'skip-url-lint') }}
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     with:
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-linter
-      submodules: 'none'
+      submodules: false
       fetch-depth: 0
       ref: ${{ inputs.ref }}
-      timeout: 90
+      timeout: 120
       script: |
         ./scripts/lint_urls.sh $(
-          [ "${{ github.event_name }}" = "pull_request" ] \
-            && git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} \
-          || [ "${{ github.event_name }}" = "push" ] \
-            && git diff --name-only ${{ github.event.before }} ${{ github.sha }}
-        )
+          { [ "${{ github.event_name }}" = "pull_request" ] \
+              && git diff --name-only "${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }}"; } \
+          || \
+          { [ "${{ github.event_name }}" = "push" ] \
+              && git diff --name-only "${{ github.event.before }}...${{ github.sha }}"; }
+        ) || {
+          echo
+          echo "URL lint failed."
+          echo "If this is a transient outage, you can bypass it by adding the \`skip-url-lint\` label to your PR."
+          echo "Or add \`@lint-ignore\` somewhere on the same line as the URL you want to skip checking."
+          exit 1
+        }
 
   lint-xrefs:
+    if: ${{ github.event_name != 'pull_request' || !contains(github.event.pull_request.labels.*.name, 'skip-xref-lint') }}
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     with:
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-linter
-      submodules: 'none'
+      submodules: false
       fetch-depth: 0
       ref: ${{ inputs.ref }}
-      timeout: 90
+      timeout: 60
       script: |
         ./scripts/lint_xrefs.sh $(
-          [ "${{ github.event_name }}" = "pull_request" ] \
-            && git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} \
-          || [ "${{ github.event_name }}" = "push" ] \
-            && git diff --name-only ${{ github.event.before }} ${{ github.sha }}
-        )
+          { [ "${{ github.event_name }}" = "pull_request" ] \
+              && git diff --name-only "${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }}"; } \
+          || \
+          { [ "${{ github.event_name }}" = "push" ] \
+              && git diff --name-only "${{ github.event.before }}...${{ github.sha }}"; }
+        ) || {
+          echo
+          echo "Xref lint failed."
+          echo "If this is a transient outage, you can bypass it by adding the \`skip-xref-lint\` label to your PR."
+          echo "Or add \`@lint-ignore\` somewhere on the same line as the reference you want to skip checking."
+          exit 1
+        }
@@ -0,0 +1,13 @@
+name: Build Presets
+
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+      - release/*
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
+  cancel-in-progress: true
@@ -608,7 +608,7 @@ endif()
 # any backends.
 #
 add_library(executorch ${_executorch__srcs})
-target_link_libraries(executorch PUBLIC executorch_core)
+target_link_libraries(executorch PRIVATE executorch_core)
 target_include_directories(executorch PUBLIC ${_common_include_directories})
 target_compile_definitions(executorch PUBLIC C10_USING_CUSTOM_GENERATED_MACROS)
 target_compile_options(executorch PUBLIC ${_common_compile_options})
 
@@ -11,5 +11,6 @@ python_library(
         "//executorch/backends/xnnpack/_passes:xnnpack_passes",
         "//executorch/exir:lib",
         "//executorch/backends/transforms:utils",
+        "//executorch/backends/transforms:decompose_sdpa",
     ],
 )
@@ -57,4 +57,5 @@
 from .size_adjust_conv2d_pass import SizeAdjustConv2DPass  # noqa
 from .unsqueeze_before_repeat_pass import UnsqueezeBeforeRepeatPass  # noqa
 from .unsqueeze_scalar_placeholders_pass import UnsqueezeScalarPlaceholdersPass  # noqa
+from .replace_inf_values_pass import ReplaceInfValues  # noqa  # usort: skip
 from .arm_pass_manager import ArmPassManager  # noqa  # usort: skip
@@ -70,17 +70,14 @@ def call(self, graph_module: GraphModule) -> PassResult:
             if quantized_input:
                 matmul_args = matmul_node.all_input_nodes
                 for node in matmul_args:
+                    # Find the dq-node connected to this mm/bmm arg
                     input_node = self._match_partition_to_node(
                         node, partition.input_nodes
                     )
-
-                    # Remove partition input dq-node
-                    input_node.replace_all_uses_with(input_node.all_input_nodes[0])
-                    graph_module.graph.erase_node(input_node)
                     input_node_qargs = QuantArgs.from_operator(
                         input_node.target, input_node.args
                     )
-
+                    # Insert new dq-node just before the mm/bmm with input_node's qparams
                     with graph_module.graph.inserting_before(matmul_node):
                         # Create new dq-node before matmul
                         dq_node = create_node(
@@ -90,6 +87,13 @@ def call(self, graph_module: GraphModule) -> PassResult:
                         dq_node.args = (node, *input_node_qargs)
                         matmul_node.replace_input_with(node, dq_node)
 
+                for partition_input in partition.input_nodes:
+                    # Remove partition input dq-node
+                    partition_input.replace_all_uses_with(
+                        partition_input.all_input_nodes[0]
+                    )
+                    graph_module.graph.erase_node(partition_input)
+
             partition_output = list(partition.output_nodes[0].users)[0]
             quantized_output = partition_output.target == q_op
             if quantized_output:
 
@@ -49,6 +49,7 @@
     MatchWhereSelfDtypePass,
     QuantizeOperatorArguments,
     RemoveClonePass,
+    ReplaceInfValues,
     ReplaceScalarWithTensorArgPassTOSABI,
     ReplaceScalarWithTensorArgPassTOSAMI,
     RetraceFoldedDtypesPass,
@@ -216,4 +217,5 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
             self.add_pass(DecomposeSoftmaxPass())
 
         self.add_pass(ConvertMinMaxPass())
+        self.add_pass(ReplaceInfValues())
         return self._transform(graph_module)
@@ -1,14 +1,15 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
-# All rights reserved.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
 # pyre-unsafe
 
 import torch.fx
-from executorch.backends.arm._passes.arm_pass_utils import create_node
-from executorch.backends.arm.tosa_mapping import extract_tensor_meta
+from executorch.backends.arm._passes.arm_pass_utils import (
+    create_node,
+    get_first_fake_tensor,
+)
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, PassResult
 
@@ -34,7 +35,7 @@ def call(self, graph_module: torch.fx.GraphModule):
             split_node = node
             input_node = split_node.all_input_nodes[0]
             output_nodes = split_node.users.copy()
-            _, shape, _ = extract_tensor_meta(input_node.meta)
+            shape = get_first_fake_tensor(input_node).shape
             rank = len(shape)
             split_lengths = split_node.args[1]
             dim = split_node.args[2] if len(split_node.args) > 2 else 0
 
@@ -0,0 +1,45 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# This pass is based on backends/qualcomm/_passes/replace_inf_values.py
+# with some modification to replaced inf values.
+
+import torch
+from executorch.exir.pass_base import ExportPass, PassResult
+
+
+class ReplaceInfValues(ExportPass):
+    """
+    Due to limitation in Quantizer, we need to change inf/-inf to more quantizable values.
+    """
+
+    def __init__(self):
+        super(ReplaceInfValues, self).__init__()
+
+    def call(self, graph_module: torch.fx.GraphModule):
+        modified = False
+        for buf_name, tensor in graph_module.named_buffers():
+            if tensor.is_floating_point():
+                modified = True
+                # 255 here is mainly for attention_mask in Llama for reasonable quant scale
+                tensor[tensor == float("inf")] = 255
+                tensor[tensor == float("-inf")] = -255
+                setattr(graph_module, buf_name, tensor)
+
+        for node in graph_module.graph.nodes:
+            arg_list = list(node.args)
+            for index, arg in enumerate(arg_list):
+                if arg == float("-inf"):
+                    modified = True
+                    arg_list[index] = -255
+                elif arg == float("inf"):
+                    modified = True
+                    arg_list[index] = +255
+            node.args = tuple(arg_list)
+
+        if modified:
+            graph_module.recompile()
+        return PassResult(graph_module, modified)
@@ -12,7 +12,6 @@
     SupportedTOSAOperatorCheck,
 )
 from executorch.backends.arm.tosa_specification import TosaSpecification
-from executorch.backends.arm.tosa_utils import getNodeArgs
 from executorch.exir.dialects._ops import ops as exir_ops
 
 logger = logging.getLogger(__name__)
@@ -33,8 +32,8 @@ def is_node_tosa_supported(self, node: fx.Node, tosa_spec: TosaSpecification) ->
         if tosa_spec not in self.tosa_specs:
             return False
 
-        inputs = getNodeArgs(node)
-        if len(inputs) == 5 and (step := inputs[4].number) != 1:
+        args = node.args
+        if len(args) == 5 and (step := args[4]) != 1:
             logging.warning(f"{node.target} with step size of {step} not supported.")
             return False
         return True
@@ -194,6 +194,7 @@ def is_node_supported(
             exir_ops.edge.aten.mul.Tensor,
             exir_ops.edge.aten.ne.Tensor,
             exir_ops.edge.aten.ne.Scalar,
+            exir_ops.edge.aten.neg.default,
             exir_ops.edge.aten.add.Scalar,
             exir_ops.edge.aten.sub.Scalar,
             exir_ops.edge.aten.mul.Scalar,
@@ -311,6 +312,7 @@ class CheckProperQuantization(OperatorSupportBase):
         exir_ops.edge.aten.max_pool2d_with_indices.default,
         exir_ops.edge.aten.mm.default,
         exir_ops.edge.aten.mul.Tensor,
+        exir_ops.edge.aten.neg.default,
         exir_ops.edge.aten.relu.default,
         exir_ops.edge.aten.sub.Tensor,
         exir_ops.edge.aten.upsample_bilinear2d.vec,
 
@@ -31,6 +31,7 @@
     op_maximum,
     op_minimum,
     op_mul,
+    op_neg,
     op_permute,
     op_pow,
     op_reciprocal,
 
@@ -13,6 +13,9 @@
     NodeVisitor,
     register_node_visitor,
 )
+from executorch.backends.arm.operators.operator_validation_utils import (
+    validate_num_inputs,
+)
 from executorch.backends.arm.tosa_mapping import TosaArg
 from executorch.backends.arm.tosa_specification import TosaSpecification
 from torch.fx import Node
@@ -39,6 +42,7 @@ def define_node(
 
         import tosa_tools.v0_80.serializer.tosa_serializer as ts  # type: ignore
 
+        validate_num_inputs(self.target, inputs, 1)
         # Specification (0.80) states that input and output types
         # should all be the same
         if not (inputs[0].dtype == output.dtype):
@@ -105,6 +109,7 @@ def define_node(
 
         import tosa_tools.v0_80.serializer.tosa_serializer as ts  # type: ignore
 
+        validate_num_inputs(self.target, inputs, 1)
         # Specification (0.80) states that input and output types
         # should all be the same
         if not (inputs[0].dtype == output.dtype):
@@ -157,6 +162,8 @@ def define_node(
 
         import serializer.tosa_serializer as ts  # type: ignore
 
+        validate_num_inputs(self.target, inputs, 1)
+
         # Specification (1.0) states that input and output types
         # should all be the same
         if not (inputs[0].dtype == output.dtype):
@@ -224,6 +231,8 @@ def define_node(
 
         import serializer.tosa_serializer as ts  # type: ignore
 
+        validate_num_inputs(self.target, inputs, 1)
+
         # Specification (1.0) states that input and output types
         # should all be the same
         if not (inputs[0].dtype == output.dtype):
 
@@ -14,6 +14,9 @@
     NodeVisitor,
     register_node_visitor,
 )
+from executorch.backends.arm.operators.operator_validation_utils import (
+    validate_num_inputs,
+)
 from executorch.backends.arm.tosa_mapping import TosaArg
 from executorch.backends.arm.tosa_specification import TosaSpecification
 from torch.fx import Node
@@ -40,6 +43,7 @@ def define_node(
 
         import tosa_tools.v0_80.serializer.tosa_serializer as ts  # type: ignore
 
+        validate_num_inputs(self.target, inputs, 2)
         # Specification (0.80) states that input and output types
         # should all be the same
         if inputs[0].dtype != inputs[1].dtype or inputs[0].dtype != output.dtype:
@@ -118,6 +122,7 @@ def define_node(
 
         import tosa_tools.v0_80.serializer.tosa_serializer as ts  # type: ignore
 
+        validate_num_inputs(self.target, inputs, 2)
         # Specification (0.80) states that input and output types
         # should all be the same
         if inputs[0].dtype != inputs[1].dtype or inputs[0].dtype != output.dtype:
@@ -169,6 +174,8 @@ def define_node(
 
         import serializer.tosa_serializer as ts  # type: ignore
 
+        validate_num_inputs(self.target, inputs, 2)
+
         # Specification (1.0) states that input and output types
         # should all be the same
         if inputs[0].dtype != inputs[1].dtype or inputs[0].dtype != output.dtype:
@@ -237,6 +244,8 @@ def define_node(
 
         import serializer.tosa_serializer as ts  # type: ignore
 
+        validate_num_inputs(self.target, inputs, 2)
+
         # Specification (1.0) states that input and output types
         # should all be the same
         if inputs[0].dtype != inputs[1].dtype or inputs[0].dtype != output.dtype:
 
@@ -9,6 +9,9 @@
     NodeVisitor,
     register_node_visitor,
 )
+from executorch.backends.arm.operators.operator_validation_utils import (
+    validate_num_inputs,
+)
 from executorch.backends.arm.tosa_mapping import TosaArg
 from torch.fx import Node
 
@@ -31,6 +34,8 @@ def define_node(
     ) -> None:
         import tosa_tools.v0_80.serializer.tosa_serializer as ts
 
+        validate_num_inputs(self.target, inputs, 3)
+
         input = inputs[0]
         dim = inputs[1].number
 
@@ -71,6 +76,8 @@ def define_node(
     ) -> None:
         import serializer.tosa_serializer as ts
 
+        validate_num_inputs(self.target, inputs, 3)
+
         input = inputs[0]
         dim = inputs[1].number
Original file line number	Diff line number	Diff line change
`@@ -608,7 +608,7 @@ endif()`
`608`	`608`	`# any backends.`
`609`	`609`	`#`
`610`	`610`	`add_library(executorch ${_executorch__srcs})`
`611`		`-target_link_libraries(executorch PUBLIC executorch_core)`
	`611`	`+target_link_libraries(executorch PRIVATE executorch_core)`
`612`	`612`	`target_include_directories(executorch PUBLIC ${_common_include_directories})`
`613`	`613`	`target_compile_definitions(executorch PUBLIC C10_USING_CUSTOM_GENERATED_MACROS)`
`614`	`614`	`target_compile_options(executorch PUBLIC ${_common_compile_options})`
Original file line number	Diff line number	Diff line change
`@@ -11,5 +11,6 @@ python_library(`
`11`	`11`	`"//executorch/backends/xnnpack/_passes:xnnpack_passes",`
`12`	`12`	`"//executorch/exir:lib",`
`13`	`13`	`"//executorch/backends/transforms:utils",`
	`14`	`+ "//executorch/backends/transforms:decompose_sdpa",`
`14`	`15`	`],`
`15`	`16`	`)`