pytorch
diff --git a/‎.ci/scripts/build-qnn-sdk.sh
Lines changed: 1 addition & 0 deletions b/‎.ci/scripts/build-qnn-sdk.sh
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/scripts/test_model.sh
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/test_model.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/_link_check.yml
Lines changed: 41 additions & 0 deletions b/‎.github/workflows/_link_check.yml
Lines changed: 41 additions & 0 deletions
diff --git a/‎.github/workflows/lint.yml
Lines changed: 2 additions & 21 deletions b/‎.github/workflows/lint.yml
Lines changed: 2 additions & 21 deletions
diff --git a/‎.github/workflows/nightly.yml
Lines changed: 6 additions & 0 deletions b/‎.github/workflows/nightly.yml
Lines changed: 6 additions & 0 deletions
diff --git a/‎Package.swift
Lines changed: 3 additions & 1 deletion b/‎Package.swift
Lines changed: 3 additions & 1 deletion
diff --git a/‎README.md
Lines changed: 1 addition & 1 deletion b/‎README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm
Lines changed: 15 additions & 1 deletion b/‎backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm
Lines changed: 15 additions & 1 deletion
diff --git a/‎backends/apple/coreml/runtime/test/ETCoreMLModelManagerTests.mm
Lines changed: 77 additions & 1 deletion b/‎backends/apple/coreml/runtime/test/ETCoreMLModelManagerTests.mm
Lines changed: 77 additions & 1 deletion
diff --git a/‎backends/apple/coreml/scripts/install_requirements.sh
Lines changed: 1 addition & 1 deletion b/‎backends/apple/coreml/scripts/install_requirements.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/apple/mps/setup.md
Lines changed: 3 additions & 3 deletions b/‎backends/apple/mps/setup.md
Lines changed: 3 additions & 3 deletions
diff --git a/‎backends/arm/_passes/__init__.py
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 7 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 7 additions & 0 deletions
diff --git a/‎backends/arm/_passes/decompose_softmax_pass.py
Lines changed: 5 additions & 1 deletion b/‎backends/arm/_passes/decompose_softmax_pass.py
Lines changed: 5 additions & 1 deletion
@@ -33,6 +33,7 @@ set_up_aot() {
   cmake .. \
       -DCMAKE_INSTALL_PREFIX=$PWD \
       -DEXECUTORCH_BUILD_QNN=ON \
+      -DANDROID_NATIVE_API_LEVEL=30 \
       -DQNN_SDK_ROOT=${QNN_SDK_ROOT} \
       -DEXECUTORCH_BUILD_DEVTOOLS=ON \
       -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
 
@@ -222,7 +222,7 @@ test_model_with_coreml() {
 
   DTYPE=float16
 
-  "${PYTHON_EXECUTABLE}" -m examples.apple.coreml.scripts.export --model_name="${MODEL_NAME}" --compute_precision "${DTYPE}"
+  "${PYTHON_EXECUTABLE}" -m examples.apple.coreml.scripts.export --model_name="${MODEL_NAME}" --compute_precision "${DTYPE}" --use_partitioner
   EXPORTED_MODEL=$(find "." -type f -name "${MODEL_NAME}*.pte" -print -quit)
 
   if [ -n "$EXPORTED_MODEL" ]; then
 
@@ -0,0 +1,41 @@
+on:
+  workflow_call:
+    inputs:
+      ref:
+        type: string
+        required: true
+
+jobs:
+  lint-urls:
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    with:
+      runner: linux.2xlarge
+      docker-image: executorch-ubuntu-22.04-linter
+      submodules: 'none'
+      fetch-depth: 0
+      ref: ${{ inputs.ref }}
+      timeout: 90
+      script: |
+        ./scripts/lint_urls.sh $(
+          [ "${{ github.event_name }}" = "pull_request" ] \
+            && git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} \
+          || [ "${{ github.event_name }}" = "push" ] \
+            && git diff --name-only ${{ github.event.before }} ${{ github.sha }}
+        )
+
+  lint-xrefs:
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    with:
+      runner: linux.2xlarge
+      docker-image: executorch-ubuntu-22.04-linter
+      submodules: 'none'
+      fetch-depth: 0
+      ref: ${{ inputs.ref }}
+      timeout: 90
+      script: |
+        ./scripts/lint_xrefs.sh $(
+          [ "${{ github.event_name }}" = "pull_request" ] \
+            && git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} \
+          || [ "${{ github.event_name }}" = "push" ] \
+            && git diff --name-only ${{ github.event.before }} ${{ github.sha }}
+        )
@@ -64,29 +64,10 @@ jobs:
 
         exit $RC
 
-  lint-urls:
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+  link-check:
+    uses: ./.github/workflows/_link_check.yml
     with:
-      runner: linux.2xlarge
-      docker-image: executorch-ubuntu-22.04-linter
-      submodules: 'none'
-      fetch-depth: 0
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        ./scripts/lint_urls.sh
-
-  lint-xrefs:
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    with:
-      runner: linux.2xlarge
-      docker-image: executorch-ubuntu-22.04-linter
-      submodules: 'none'
-      fetch-depth: 0
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        ./scripts/lint_xrefs.sh
 
   android-java-format:
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
 
@@ -30,3 +30,9 @@ jobs:
           test-infra-ref: main
           updatebot-token: ${{ secrets.UPDATEBOT_TOKEN }}
           pytorchbot-token: ${{ secrets.GH_PYTORCHBOT_TOKEN }}
+
+  link-check:
+    needs: update-pytorch-commit-hash
+    uses: ./.github/workflows/_link_check.yml
+    with:
+      ref: ${{ github.sha }}
@@ -77,7 +77,9 @@ let package = Package(
         name: "\(key)_dependencies",
         dependencies: [.target(name: key)],
         path: ".Package.swift/\(key)",
-        linkerSettings:
+        linkerSettings: [
+          .linkedLibrary("c++")
+        ] +
           (value["frameworks"] as? [String] ?? []).map { .linkedFramework($0) } +
           (value["libraries"] as? [String] ?? []).map { .linkedLibrary($0) }
       ),
 
@@ -51,7 +51,7 @@ To get started you can:
 
 - Visit the [Step by Step Tutorial](https://pytorch.org/executorch/stable/getting-started.html) to get things running locally and deploy a model to a device
 - Use this [Colab Notebook](https://colab.research.google.com/drive/1qpxrXC3YdJQzly3mRg-4ayYiOjC6rue3?usp=sharing) to start playing around right away
-- Jump straight into LLM use cases by following specific instructions for [Llama](examples/models/llama/README.md) and [Llava](examples/models/llava/README.md)
+- Jump straight into LLM use cases by following specific instructions for popular open-source models such as [Llama](examples/models/llama/README.md), [Qwen 3](examples/models/qwen3/README.md), [Phi-4-mini](examples/models/phi_4_mini/README.md), and [Llava](examples/models/llava/README.md)
 
 ## Feedback and Engagement
 
 
@@ -88,9 +88,17 @@
         ET_LOG(Error, "%s: DataType=%d is not supported", ETCoreMLStrings.delegateIdentifier.UTF8String, (int)tensor.scalar_type());
         return std::nullopt;
     }
-
+    
     std::vector<ssize_t> strides(tensor.strides().begin(), tensor.strides().end());
     std::vector<size_t> shape(tensor.sizes().begin(), tensor.sizes().end());
+    
+    // If tensor is rank 0, wrap in rank 1
+    // See https://github.com/apple/coremltools/blob/8.2/coremltools/converters/mil/frontend/torch/exir_utils.py#L73
+    if (shape.size() == 0) {
+        shape.push_back(1);
+        strides.push_back(1);
+    }
+    
     MultiArray::MemoryLayout layout(dataType.value(), std::move(shape), std::move(strides));
     switch (argType) {
         case ArgType::Input: {
@@ -233,6 +241,12 @@ ModelLoggingOptions get_logging_options(BackendExecutionContext& context) {
     std::array<SizesType, kTensorDimensionLimit> new_shape;
     for (size_t i = nInputs; i < nInputs + nOutputs; i++) {
         Tensor& t = args[i]->toTensor();
+        // If t has rank 0, do not resize.  delegate_args[i] will have rank 1
+        // because we resized it in get_multi_array
+        if (t.dim() == 0) {
+            continue;
+        }
+
         int rank = delegate_args[i].layout().rank();
         assert (rank <= new_shape.size());
         for (int d = 0; d < rank; d++) {
 
@@ -15,6 +15,9 @@
 #import <XCTest/XCTest.h>
 #import <executorch/runtime/platform/runtime.h>
 #import <model_logging_options.h>
+#import <multiarray.h>
+
+using namespace executorchcoreml;
 
 @interface ETCoreMLModelManagerTests : XCTestCase
 
@@ -110,7 +113,7 @@ - (void)testAddModelExecution {
     XCTAssertNotNil(inputs);
     MLMultiArray *output = [ETCoreMLTestUtils filledMultiArrayWithShape:inputs[0].shape dataType:inputs[0].dataType repeatedValue:@(0) error:&localError];
     NSArray<MLMultiArray *> *args = [inputs arrayByAddingObject:output];
-    XCTAssertTrue([self.modelManager executeModelWithHandle:handle 
+    XCTAssertTrue([self.modelManager executeModelWithHandle:handle
                                                        args:args
                                              loggingOptions:executorchcoreml::ModelLoggingOptions()
                                                 eventLogger:nullptr
@@ -148,4 +151,77 @@ - (void)testMulModelExecution {
     }
 }
 
+// See https://github.com/pytorch/executorch/pull/10465
+- (void)testAutoreleasepoolError {
+    NSURL *modelURL = [self.class bundledResourceWithName:@"add_coreml_all" extension:@"bin"];
+    NSError *localError = nil;
+    XCTAssertNotNil(modelURL);
+
+    NSData *modelData = [NSData dataWithContentsOfURL:modelURL];
+    MLModelConfiguration *configuration = [[MLModelConfiguration alloc] init];
+    configuration.computeUnits = MLComputeUnitsAll;
+    ModelHandle *modelHandle = [self.modelManager loadModelFromAOTData:modelData
+                                                           configuration:configuration
+                                                                   error:&localError];
+    XCTAssert(modelHandle);
+
+    ETCoreMLModel *model = [self.modelManager modelWithHandle:modelHandle];
+    XCTAssert(model);
+
+    NSArray<MLMultiArray *> *inputArrays =
+        [ETCoreMLTestUtils inputsForModel:model repeatedValues:@[@(2), @(3)] error:&localError];
+    XCTAssert(inputArrays);
+
+    std::vector<MultiArray> multiArrays;
+    multiArrays.reserve(inputArrays.count + model.orderedOutputNames.count);
+    for (MLMultiArray *array in inputArrays) {
+        auto dataTypeOpt = to_multiarray_data_type(array.dataType);
+        XCTAssert(dataTypeOpt.has_value());
+        auto dataType = dataTypeOpt.value();
+
+        std::vector<size_t> dims;
+        for (NSNumber *n in array.shape) {
+            dims.push_back(n.unsignedLongValue);
+        }
+
+        std::vector<ssize_t> strides(dims.size());
+        ssize_t currentStride = 1;
+        for (NSInteger i = dims.size() - 1; i >= 0; --i) {
+            strides[i] = currentStride;
+            currentStride *= dims[i];
+        }
+
+        multiArrays.emplace_back(array.dataPointer,
+                                 MultiArray::MemoryLayout(dataType, dims, strides));
+    }
+
+    auto inputLayout = multiArrays[0].layout();
+    size_t bufferSize = inputLayout.num_bytes();
+    for (NSUInteger i = 0; i < model.orderedOutputNames.count; ++i) {
+        multiArrays.emplace_back(calloc(1, bufferSize), inputLayout);
+    }
+    // corrupt first input shape to force error
+    {
+        auto originalLayout = multiArrays[0].layout();
+        auto corruptedDims = originalLayout.shape();
+        corruptedDims[0] += 1;
+        multiArrays[0] = MultiArray(multiArrays[0].data(),
+                                    MultiArray::MemoryLayout(originalLayout.dataType(),
+                                                             corruptedDims,
+                                                             originalLayout.strides()));
+    }
+
+    BOOL success = [self.modelManager executeModelWithHandle:modelHandle
+                                                    argsVec:multiArrays
+                                             loggingOptions:ModelLoggingOptions()
+                                                eventLogger:nullptr
+                                                      error:&localError];
+    XCTAssertFalse(success);
+    XCTAssertNotNil(localError);
+
+    for (size_t i = inputArrays.count; i < multiArrays.size(); ++i) {
+        free(multiArrays[i].data());
+    }
+}
+
 @end
@@ -12,7 +12,7 @@ SCRIPT_DIR_PATH="$(
 
 # TODO(jathu): remove the need to fetch coremltools to build deps for coreml_executor_runner.
 # Keep this version in sync with: pyproject.toml
-COREMLTOOLS_VERSION="8.2"
+COREMLTOOLS_VERSION="8.3"
 
 red=`tput setaf 1`
 green=`tput setaf 2`
 
@@ -76,12 +76,12 @@ cd executorch
 ## Run the mv3 generated model using the mps_executor_runner
 
 ```bash
-./cmake-out/examples/apple/mps/mps_executor_runner --model_path mv3_mps_bundled_fp16.pte --bundled_program
+./cmake-out/examples/apple/mps/mps_executor_runner --model_path mv3_mps_float16_bundled.pte --bundled_program
 ```
 
 - You should see the following results. Note that no output file will be generated in this example:
 ```
-I 00:00:00.003290 executorch:mps_executor_runner.mm:286] Model file mv3_mps_bundled_fp16.pte is loaded.
+I 00:00:00.003290 executorch:mps_executor_runner.mm:286] Model file mv3_mps_float16_bundled.pte is loaded.
 I 00:00:00.003306 executorch:mps_executor_runner.mm:292] Program methods: 1
 I 00:00:00.003308 executorch:mps_executor_runner.mm:294] Running method forward
 I 00:00:00.003311 executorch:mps_executor_runner.mm:349] Setting up non-const buffer 1, size 606112.
@@ -118,7 +118,7 @@ python3 -m examples.apple.mps.scripts.mps_example --model_name="mv3" --generate_
 ```
 2. Run your Program on the ExecuTorch runtime and generate an [ETDump](../../../docs/source/etdump.md).
 ```
-./cmake-out/examples/apple/mps/mps_executor_runner --model_path mv3_mps_bundled_fp16.pte --bundled_program --dump-outputs
+./cmake-out/examples/apple/mps/mps_executor_runner --model_path mv3_mps_float16_bundled.pte --bundled_program --dump-outputs
 ```
 3. Create an instance of the Inspector API by passing in the ETDump you have sourced from the runtime along with the optionally generated ETRecord from step 1.
 ```bash
 
@@ -39,6 +39,7 @@
 )
 from .fuse_batchnorm2d_pass import FuseBatchnorm2DPass  # noqa
 from .fuse_constant_ops_pass import ComputeConstantOpsAOT, FuseConstantArgsPass  # noqa
+from .fuse_equal_placeholders_pass import FuseEqualPlaceholdersPass  # noqa
 from .fuse_quantized_activation_pass import FuseQuantizedActivationPass  # noqa
 from .insert_rescales_pass import InsertRescalePass  # noqa
 from .insert_table_ops import InsertTableOpsPass  # noqa
 
@@ -40,6 +40,7 @@
     FoldAndAnnotateQParamsPass,
     FuseBatchnorm2DPass,
     FuseConstantArgsPass,
+    FuseEqualPlaceholdersPass,
     FuseQuantizedActivationPass,
     InsertRescalePass,
     InsertTableOpsPass,
@@ -58,6 +59,9 @@
 )
 
 from executorch.backends.arm.tosa_specification import Tosa_0_80, TosaSpecification
+from executorch.backends.transforms.decompose_sdpa import (
+    DecomposeScaledDotProductAttention,
+)
 from executorch.backends.transforms.fuse_view_copy import FuseViewCopyTransform
 from executorch.backends.xnnpack._passes.remove_getitem_op import RemoveGetItemPass
 from executorch.exir import ExportedProgram
@@ -113,6 +117,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(FuseConstantArgsPass(exported_program))
 
         self.add_pass(InsertTableOpsPass(exported_program))
+        self.add_pass(FuseEqualPlaceholdersPass(exported_program))
         self.add_pass(AnnotateChannelsLastDimOrder())
         self.add_pass(InsertRescalePass())
 
@@ -164,6 +169,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(FuseViewCopyTransform())
         self.add_pass(FuseConstantArgsPass(exported_program))
         self.add_pass(InsertTableOpsPass(exported_program))
+        self.add_pass(FuseEqualPlaceholdersPass(exported_program))
         self.add_pass(AnnotateChannelsLastDimOrder())
         self.add_pass(InsertRescalePass())
 
@@ -191,6 +197,7 @@ def transform_to_backend_pipeline(self, exported_program: ExportedProgram):
             )
 
     def transform_for_annotation_pipeline(self, graph_module: GraphModule):
+        self.add_pass(DecomposeScaledDotProductAttention())
         self.add_pass(ReplaceScalarWithTensorArgPassTOSABI())
         self.add_pass(ScalarsToAttributePass())
         self.add_pass(DecomposeLayerNormPass())
 
@@ -8,7 +8,11 @@
 from executorch.exir.pass_base import ExportPass
 
 # For BI case
-torch_softmax = (torch.ops.aten.softmax.int, torch.ops.aten.log_softmax.int)
+torch_softmax = (
+    torch.ops.aten.softmax.int,
+    torch.ops.aten._safe_softmax.default,
+    torch.ops.aten.log_softmax.int,
+)
 # For MI case
 edge_softmax = (
     exir_ops.edge.aten._softmax.default,
Original file line number	Diff line number	Diff line change
`@@ -39,6 +39,7 @@`
`39`	`39`	`)`
`40`	`40`	`from .fuse_batchnorm2d_pass import FuseBatchnorm2DPass # noqa`
`41`	`41`	`from .fuse_constant_ops_pass import ComputeConstantOpsAOT, FuseConstantArgsPass # noqa`
	`42`	`+from .fuse_equal_placeholders_pass import FuseEqualPlaceholdersPass # noqa`
`42`	`43`	`from .fuse_quantized_activation_pass import FuseQuantizedActivationPass # noqa`
`43`	`44`	`from .insert_rescales_pass import InsertRescalePass # noqa`
`44`	`45`	`from .insert_table_ops import InsertTableOpsPass # noqa`