pytorch
diff --git a/‎.ci/scripts/unittest-macos.sh
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/unittest-macos.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎CMakeLists.txt
Lines changed: 4 additions & 0 deletions b/‎CMakeLists.txt
Lines changed: 4 additions & 0 deletions
diff --git a/‎backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.h
Lines changed: 1 addition & 1 deletion b/‎backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm
Lines changed: 7 additions & 1 deletion b/‎backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm
Lines changed: 7 additions & 1 deletion
diff --git a/‎backends/apple/coreml/runtime/delegate/backend_delegate.h
Lines changed: 1 addition & 1 deletion b/‎backends/apple/coreml/runtime/delegate/backend_delegate.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/apple/coreml/runtime/delegate/backend_delegate.mm
Lines changed: 3 additions & 3 deletions b/‎backends/apple/coreml/runtime/delegate/backend_delegate.mm
Lines changed: 3 additions & 3 deletions
diff --git a/‎backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm
Lines changed: 20 additions & 0 deletions b/‎backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm
Lines changed: 20 additions & 0 deletions
diff --git a/‎backends/apple/coreml/runtime/delegate/multiarray.h
Lines changed: 7 additions & 0 deletions b/‎backends/apple/coreml/runtime/delegate/multiarray.h
Lines changed: 7 additions & 0 deletions
diff --git a/‎backends/apple/coreml/runtime/delegate/multiarray.mm
Lines changed: 18 additions & 0 deletions b/‎backends/apple/coreml/runtime/delegate/multiarray.mm
Lines changed: 18 additions & 0 deletions
diff --git a/‎backends/apple/coreml/runtime/test/BackendDelegateTests.mm
Lines changed: 5 additions & 3 deletions b/‎backends/apple/coreml/runtime/test/BackendDelegateTests.mm
Lines changed: 5 additions & 3 deletions
diff --git a/‎backends/apple/coreml/runtime/test/MultiArrayTests.mm
Lines changed: 16 additions & 0 deletions b/‎backends/apple/coreml/runtime/test/MultiArrayTests.mm
Lines changed: 16 additions & 0 deletions
diff --git a/‎backends/apple/coreml/runtime/workspace/executorchcoreml.xcodeproj/xcshareddata/xcschemes/executorchcoreml_tests.xcscheme
Lines changed: 5 additions & 0 deletions b/‎backends/apple/coreml/runtime/workspace/executorchcoreml.xcodeproj/xcshareddata/xcschemes/executorchcoreml_tests.xcscheme
Lines changed: 5 additions & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 12 additions & 5 deletions b/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 12 additions & 5 deletions
diff --git a/‎backends/arm/_passes/decompose_softmax_pass.py
Lines changed: 77 additions & 0 deletions b/‎backends/arm/_passes/decompose_softmax_pass.py
Lines changed: 77 additions & 0 deletions
diff --git a/‎backends/arm/_passes/decompose_softmaxes_pass.py renamed to ‎backends/arm/_passes/decompose_softmax_unstable_pass.py
Lines changed: 2 additions & 3 deletions b/‎backends/arm/_passes/decompose_softmaxes_pass.py renamed to ‎backends/arm/_passes/decompose_softmax_unstable_pass.py
Lines changed: 2 additions & 3 deletions
@@ -35,7 +35,7 @@ if [[ "$BUILD_TOOL" == "cmake" ]]; then
     .ci/scripts/unittest-macos-cmake.sh
 elif [[ "$BUILD_TOOL" == "buck2" ]]; then
     .ci/scripts/unittest-buck2.sh
-    .ci/scripts/unittest-macos-buck2.sh
+    # .ci/scripts/unittest-macos-buck2.sh
 else
     echo "Unknown build tool $BUILD_TOOL"
     exit 1
 
@@ -15,6 +15,7 @@ cmake-android-out/
 cmake-ios-out/
 cmake-out*
 cmake-out-android/
+dist/
 ethos-u-scratch/
 executorch.egg-info
 pip-out/
 
@@ -922,6 +922,10 @@ if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
     endif()
   endif()
 
+  if(EXECUTORCH_BUILD_COREML)
+    list(APPEND _executor_runner_libs coremldelegate)
+  endif()
+
   add_executable(executor_runner ${_executor_runner__srcs})
   if(CMAKE_BUILD_TYPE STREQUAL "Release")
     if(APPLE)
 
@@ -71,7 +71,7 @@ __attribute__((objc_subclassing_restricted))
 /// @param error   On failure, error is filled with the failure information.
 /// @retval `YES` if the execution succeeded otherwise `NO`.
 - (BOOL)executeModelWithHandle:(ModelHandle*)handle
-                       argsVec:(const std::vector<executorchcoreml::MultiArray>&)argsVec
+                       argsVec:(std::vector<executorchcoreml::MultiArray>&)argsVec
                 loggingOptions:(const executorchcoreml::ModelLoggingOptions&)loggingOptions
                    eventLogger:(const executorchcoreml::ModelEventLogger* _Nullable)eventLogger
                          error:(NSError* __autoreleasing*)error;
 
@@ -734,7 +734,7 @@ - (BOOL)executeModelWithHandle:(ModelHandle *)handle
 }
 
 - (BOOL)executeModelWithHandle:(ModelHandle *)handle
-                       argsVec:(const std::vector<executorchcoreml::MultiArray>&)argsVec
+                       argsVec:(std::vector<executorchcoreml::MultiArray>&)argsVec
                 loggingOptions:(const executorchcoreml::ModelLoggingOptions&)loggingOptions
                    eventLogger:(const executorchcoreml::ModelEventLogger* _Nullable)eventLogger
                          error:(NSError * __autoreleasing *)error {
@@ -785,6 +785,12 @@ - (BOOL)executeModelWithHandle:(ModelHandle *)handle
             return NO;
         }
 
+        // Resize for dynamic shapes
+        for (int i = 0; i < outputArgs.size(); i++) {
+            auto new_size = to_vector<size_t>(modelOutputs[i].shape);
+            outputArgs[i].resize(new_size);
+            argsVec[model.orderedInputNames.count + i].resize(new_size);
+        }
         ::set_outputs(outputArgs, modelOutputs);
         return YES;
     }
 
@@ -89,7 +89,7 @@ class BackendDelegate {
     /// @param error   On failure, error is filled with the failure information.
     /// @retval `true` if the execution succeeded otherwise `false`.
     virtual bool execute(Handle* handle,
-                         const std::vector<MultiArray>& args,
+                         std::vector<MultiArray>& args,
                          const ModelLoggingOptions& logging_options,
                          ModelEventLogger* event_logger,
                          std::error_code& error) const noexcept = 0;
 
@@ -104,7 +104,7 @@ - (ModelHandle*)loadModelFromAOTData:(NSData*)data
                                error:(NSError* __autoreleasing*)error;
 
 - (BOOL)executeModelWithHandle:(ModelHandle*)handle
-                       argsVec:(const std::vector<executorchcoreml::MultiArray>&)argsVec
+                       argsVec:(std::vector<executorchcoreml::MultiArray>&)argsVec
                 loggingOptions:(const executorchcoreml::ModelLoggingOptions&)loggingOptions
                    eventLogger:(const executorchcoreml::ModelEventLogger* _Nullable)eventLogger
                          error:(NSError* __autoreleasing*)error;
@@ -199,7 +199,7 @@ - (ModelHandle*)loadModelFromAOTData:(NSData*)data
 }
 
 - (BOOL)executeModelWithHandle:(ModelHandle*)handle
-                       argsVec:(const std::vector<executorchcoreml::MultiArray>&)argsVec
+                       argsVec:(std::vector<executorchcoreml::MultiArray>&)argsVec
                 loggingOptions:(const executorchcoreml::ModelLoggingOptions&)loggingOptions
                    eventLogger:(const executorchcoreml::ModelEventLogger* _Nullable)eventLogger
                          error:(NSError* __autoreleasing*)error {
@@ -286,7 +286,7 @@ explicit BackendDelegateImpl(const Config& config) noexcept
     }
 
     bool execute(Handle* handle,
-                 const std::vector<MultiArray>& args,
+                 std::vector<MultiArray>& args,
                  const ModelLoggingOptions& logging_options,
                  ModelEventLogger *event_logger,
                  std::error_code& ec) const noexcept override {
 
@@ -12,13 +12,15 @@
 #import <coreml_backend/delegate.h>
 #import <executorch/runtime/core/evalue.h>
 #import <executorch/runtime/platform/log.h>
+#import <executorch/runtime/kernel/kernel_includes.h>
 #import <memory>
 #import <model_event_logger.h>
 #import <model_logging_options.h>
 #import <multiarray.h>
 #import <objc_safe_cast.h>
 #import <unordered_map>
 #import <vector>
+#include <array>
 
 #ifdef ET_EVENT_TRACER_ENABLED
 #import <model_event_logger_impl.h>
@@ -40,6 +42,9 @@
 using executorch::runtime::FreeableBuffer;
 using executorch::runtime::get_backend_class;
 using executorch::runtime::Result;
+using executorch::aten::SizesType;
+using executorch::aten::Tensor;
+using executorch::runtime::kTensorDimensionLimit;
 
 std::optional<MultiArray::DataType> get_data_type(ScalarType scalar_type) {
     switch (scalar_type) {
@@ -221,6 +226,21 @@ ModelLoggingOptions get_logging_options(BackendExecutionContext& context) {
                              ETCoreMLStrings.delegateIdentifier.UTF8String);
 #endif
 
+    // Resize for dynamic shape
+    std::array<SizesType, kTensorDimensionLimit> new_shape;
+    for (size_t i = nInputs; i < nInputs + nOutputs; i++) {
+        Tensor& t = args[i]->toTensor();
+        int rank = delegate_args[i].layout().rank();
+        assert (rank <= new_shape.size());
+        for (int d = 0; d < rank; d++) {
+            new_shape[d] = delegate_args[i].layout().shape()[d];
+        }
+        ET_CHECK_OR_RETURN_ERROR(
+            resize_tensor(t, ArrayRef(new_shape.data(), rank)) == Error::Ok,
+            DelegateInvalidHandle,
+            "%s: Failed to resize delegate output %zu",  ETCoreMLStrings.delegateIdentifier.UTF8String, i);
+    }
+
     return Error::Ok;
 }
 
 
@@ -84,6 +84,11 @@ class MultiArray final {
         /// Returns `true` if the memory layout is packed otherwise `false`.
         bool is_packed() const noexcept;
 
+        // Resizes memory layout
+        // New shape must be the same dimension and no larger than current shape in all dimensions
+        // New format is contiguous
+        void resize(const std::vector<size_t>& shape);
+
     private:
         DataType dataType_;
         std::vector<size_t> shape_;
@@ -126,6 +131,8 @@ class MultiArray final {
         *ptr = value;
     }
 
+    void resize(const std::vector<size_t>& shape) { layout_.resize(shape); }
+
 private:
     void* data(const std::vector<size_t>& indices) const noexcept;
 
 
@@ -512,6 +512,24 @@ ssize_t get_data_offset(size_t index, const std::vector<size_t>& shape, const st
 
 namespace executorchcoreml {
 
+void MultiArray::MemoryLayout::resize(const std::vector<size_t>& shape) {
+    assert(shape.size() == shape_.size());
+    for (int i = 0; i < shape.size(); ++i) {
+        assert (shape[i] >= 1);
+        assert(shape[i] <= shape_[i]);
+    }
+    int stride = 1;
+    for (int i = shape.size() - 1; i >= 0; --i) {
+        shape_[i] = shape[i];
+        strides_[i] = stride;
+        if (shape[i] > 1) {
+            stride *= shape[i];
+        }
+    }
+}
+
+
+
 size_t MultiArray::MemoryLayout::num_elements() const noexcept {
     if (shape_.size() == 0) {
         return 0;
 
@@ -162,8 +162,9 @@ - (void)testAddModelExecution {
     MLMultiArray *output = [ETCoreMLTestUtils filledMultiArrayWithShape:inputs[0].shape dataType:inputs[0].dataType repeatedValue:@(0) error:&localError];
     NSArray<MLMultiArray *> *args = [inputs arrayByAddingObject:output];
     std::error_code errorCode;
+    auto argsVec = to_multiarrays(args);
     XCTAssertTrue(_delegate->execute(handle,
-                                     to_multiarrays(args),
+                                     argsVec,
                                      ModelLoggingOptions(),
                                      nullptr,
                                      errorCode));
@@ -187,8 +188,9 @@ - (void)testMulModelExecution {
     MLMultiArray *output = [ETCoreMLTestUtils filledMultiArrayWithShape:inputs[0].shape dataType:inputs[0].dataType repeatedValue:@(0) error:&localError];
     NSArray<MLMultiArray *> *args = [inputs arrayByAddingObject:output];
     std::error_code errorCode;
-    XCTAssertTrue(_delegate->execute(handle, 
-                                     to_multiarrays(args),
+    auto argsVec = to_multiarrays(args);
+    XCTAssertTrue(_delegate->execute(handle,
+                                     argsVec,
                                      ModelLoggingOptions(),
                                      nullptr,
                                      errorCode));
 
@@ -130,4 +130,20 @@ - (void)testNonAdjacentDataCopy {
     [self verifyDataCopyWithShape:shape srcStrides:srcStrides dstStrides:dstStrides];
 }
 
+- (void)testResize {
+    std::vector<size_t> shape = {3, 1, 2, 5};
+    std::vector<ssize_t> strides = {1*2*5, 2*5, 5, 1};
+    std::vector<uint8_t> storage;
+    std::vector<size_t> newShape = {3, 1, 1, 1};
+    
+    auto array = make_multi_array_and_fill<int>(shape, strides, storage);
+    for (size_t i = 0; i < array.layout().rank(); ++i) {
+        XCTAssertEqual(array.layout().shape()[i], shape[i]);
+    }
+    array.resize(newShape);
+    for (size_t i = 0; i < array.layout().rank(); ++i) {
+        XCTAssertEqual(array.layout().shape()[i], newShape[i]);
+    }
+}
+
 @end
@@ -23,6 +23,11 @@
                BlueprintName = "executorchcoreml_tests"
                ReferencedContainer = "container:executorchcoreml.xcodeproj">
             </BuildableReference>
+            <SkippedTests>
+               <Test
+                  Identifier = "ETCoreMLModelDebuggerTests/testMV3ProgramDebugging">
+               </Test>
+            </SkippedTests>
          </TestableReference>
       </Testables>
    </TestAction>
 
@@ -44,8 +44,9 @@
 from executorch.backends.arm._passes.decompose_select import (  # type: ignore[import-not-found]
     DecomposeSelectPass,
 )
-from executorch.backends.arm._passes.decompose_softmaxes_pass import (
-    DecomposeSoftmaxesPass,
+from executorch.backends.arm._passes.decompose_softmax_pass import DecomposeSoftmaxPass
+from executorch.backends.arm._passes.decompose_softmax_unstable_pass import (
+    DecomposeSoftmaxUnstablePass,
 )
 from executorch.backends.arm._passes.decompose_var_pass import DecomposeVarPass
 from executorch.backends.arm._passes.fold_qdq_with_annotated_qparams_pass import (
@@ -81,7 +82,7 @@
 from executorch.backends.arm._passes.unsqueeze_scalar_placeholders_pass import (
     UnsqueezeScalarPlaceholdersPass,
 )
-from executorch.backends.arm.tosa_specification import TosaSpecification
+from executorch.backends.arm.tosa_specification import Tosa_0_80, TosaSpecification
 from executorch.backends.transforms.fuse_view_copy import FuseViewCopyTransform
 
 from executorch.backends.transforms.replace_scalar_with_tensor import (
@@ -155,7 +156,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(DecomposeMeanDimPass())
         self.add_pass(ConvertMeanDimToAveragePoolPass())
         self.add_pass(DecomposeDivPass())
-        self.add_pass(DecomposeSoftmaxesPass())
+        self.add_pass(DecomposeSoftmaxPass())
         self.add_pass(ConvertFullLikeToFullPass())
         self.add_pass(ConvertToClampPass())
         self.add_pass(ConvertMinMaxPass())
@@ -204,6 +205,12 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
         self.add_pass(DecomposeVarPass())
         self.add_pass(DecomposeMeanDimPass())
         self.add_pass(DecomposeDivPass())
-        self.add_pass(DecomposeSoftmaxesPass())
+
+        if isinstance(self.tosa_spec, Tosa_0_80) and self.tosa_spec.is_U55_subset:
+            # Numerically stable softmax uses amax which is not supported on Ethos-U55
+            self.add_pass(DecomposeSoftmaxUnstablePass())
+        else:
+            self.add_pass(DecomposeSoftmaxPass())
+
         self.add_pass(ConvertMinMaxPass())
         return self._transform(graph_module)
@@ -0,0 +1,77 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass
+
+# For BI case
+torch_softmax = (torch.ops.aten.softmax.int, torch.ops.aten.log_softmax.int)
+# For MI case
+edge_softmax = (
+    exir_ops.edge.aten._softmax.default,
+    exir_ops.edge.aten._log_softmax.default,
+)
+log_softmax = (torch.ops.aten.log_softmax.int, exir_ops.edge.aten._log_softmax.default)
+
+
+def _get_logsoftmax_ops(op) -> tuple:
+    """
+    Returns the (log_op, sub_op, amax_op, expo_op, sum_op, reciprocal_op), where the ops depends on if
+    the softmax op is an aten or edge op.
+    """
+    if op in edge_softmax:
+        return (
+            exir_ops.edge.aten.log.default,
+            exir_ops.edge.aten.sub.Tensor,
+            exir_ops.edge.aten.amax.default,
+            exir_ops.edge.aten.exp.default,
+            exir_ops.edge.aten.sum.dim_IntList,
+            exir_ops.edge.aten.reciprocal.default,
+            exir_ops.edge.aten.mul.Tensor,
+        )
+    if op in torch_softmax:
+        return (
+            torch.ops.aten.log.default,
+            torch.ops.aten.sub.Tensor,
+            torch.ops.aten.amax.default,
+            torch.ops.aten.exp.default,
+            torch.ops.aten.sum.dim_IntList,
+            torch.ops.aten.reciprocal.default,
+            torch.ops.aten.mul.Tensor,
+        )
+    raise RuntimeError(f"Can't get logsoftmax decomposition ops for op {op}")
+
+
+class DecomposeSoftmaxPass(ExportPass):
+    """
+    This pass decomposes log_softmax or softmax into more primitive ops.
+    Example:
+        %op1 = amax(x)
+        %op2 = sub(x, %op1)
+        %op3 = exp(%op2)
+        %op4 = sum(%op3, dim)
+        %op5 = reciprocal(%op4)
+        %op6 = mul(%op3, %op5)
+        (in logsoftmax case: %op7 = log(%op6))
+    """
+
+    def call_operator(self, op, args, kwargs, meta):
+        if op not in torch_softmax + edge_softmax:
+            return super().call_operator(op, args, kwargs, meta)
+        log_op, sub_op, max_op, exp_op, sum_op, reciprocal_op, mul_op = (
+            _get_logsoftmax_ops(op)
+        )
+        _input = args[0]
+        dim = [args[1]]
+        op1 = super().call_operator(max_op, (_input, dim, True), {}, meta)
+        op2 = super().call_operator(sub_op, (_input, op1), {}, meta)
+        op3 = super().call_operator(exp_op, (op2,), {}, meta)
+        op4 = super().call_operator(sum_op, (op3, dim, True), {}, meta)
+        op5 = super().call_operator(reciprocal_op, (op4,), {}, meta)
+        op6 = super().call_operator(mul_op, (op3, op5), {}, meta)
+        if op in log_softmax:
+            op6 = super().call_operator(log_op, (op6,), {}, meta)
+        return op6
@@ -1,5 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
-# All rights reserved.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -46,7 +45,7 @@ def get_logsoftmax_ops(op) -> tuple:
     raise RuntimeError(f"Can't get softmax decomposition ops for op {op}")
 
 
-class DecomposeSoftmaxesPass(ExportPass):
+class DecomposeSoftmaxUnstablePass(ExportPass):
     """
     This pass decomposes log softmax or softmax into more primitive ops.
Original file line number	Diff line number	Diff line change
`@@ -734,7 +734,7 @@ - (BOOL)executeModelWithHandle:(ModelHandle *)handle`
`734`	`734`	`}`
`735`	`735`
`736`	`736`	`- (BOOL)executeModelWithHandle:(ModelHandle *)handle`
`737`		`- argsVec:(const std::vector<executorchcoreml::MultiArray>&)argsVec`
	`737`	`+ argsVec:(std::vector<executorchcoreml::MultiArray>&)argsVec`
`738`	`738`	`loggingOptions:(const executorchcoreml::ModelLoggingOptions&)loggingOptions`
`739`	`739`	`eventLogger:(const executorchcoreml::ModelEventLogger* _Nullable)eventLogger`
`740`	`740`	`error:(NSError * __autoreleasing *)error {`
`@@ -785,6 +785,12 @@ - (BOOL)executeModelWithHandle:(ModelHandle *)handle`
`785`	`785`	`return NO;`
`786`	`786`	`}`
`787`	`787`
	`788`	`+ // Resize for dynamic shapes`
	`789`	`+ for (int i = 0; i < outputArgs.size(); i++) {`
	`790`	`+ auto new_size = to_vector<size_t>(modelOutputs[i].shape);`
	`791`	`+ outputArgs[i].resize(new_size);`
	`792`	`+ argsVec[model.orderedInputNames.count + i].resize(new_size);`
	`793`	`+ }`
`788`	`794`	`::set_outputs(outputArgs, modelOutputs);`
`789`	`795`	`return YES;`
`790`	`796`	`}`