pytorch
diff --git a/‎.github/workflows/android.yml
Lines changed: 13 additions & 2 deletions b/‎.github/workflows/android.yml
Lines changed: 13 additions & 2 deletions
diff --git a/‎.github/workflows/doc-build.yml
Lines changed: 15 additions & 31 deletions b/‎.github/workflows/doc-build.yml
Lines changed: 15 additions & 31 deletions
diff --git a/‎backends/apple/coreml/README.md
Lines changed: 90 additions & 25 deletions b/‎backends/apple/coreml/README.md
Lines changed: 90 additions & 25 deletions
diff --git a/‎backends/apple/coreml/setup.md
Lines changed: 8 additions & 8 deletions b/‎backends/apple/coreml/setup.md
Lines changed: 8 additions & 8 deletions
diff --git a/‎backends/apple/mps/mps_preprocess.py
Lines changed: 12 additions & 0 deletions b/‎backends/apple/mps/mps_preprocess.py
Lines changed: 12 additions & 0 deletions
@@ -48,12 +48,23 @@ jobs:
         # Build Android demo app
         bash build/test_android_ci.sh
 
+        # Strip libraries for uploda
+        strip cmake-out-android-arm64-v8a/lib/*.a cmake-out-android-arm64-v8a/extension/android/*.so
+        strip cmake-out-android-x86_64/lib/*.a cmake-out-android-x86_64/extension/android/*.so
+
         mkdir -p artifacts-to-be-uploaded
+        mkdir -p artifacts-to-be-uploaded/arm64-v8a/
+        mkdir -p artifacts-to-be-uploaded/x86_64/
+        # Copy the jar to S3
+        cp extension/android/build/libs/executorch.jar artifacts-to-be-uploaded/
         # Copy the app and its test suite to S3
         cp examples/demo-apps/android/LlamaDemo/app/build/outputs/apk/debug/*.apk artifacts-to-be-uploaded/
         cp examples/demo-apps/android/LlamaDemo/app/build/outputs/apk/androidTest/debug/*.apk artifacts-to-be-uploaded/
-        # Also copy the share libraries
-        cp cmake-out-android/lib/*.a artifacts-to-be-uploaded/
+        # Also copy the libraries
+        cp cmake-out-android-arm64-v8a/lib/*.a artifacts-to-be-uploaded/arm64-v8a/
+        cp cmake-out-android-arm64-v8a/extension/android/*.so artifacts-to-be-uploaded/arm64-v8a/
+        cp cmake-out-android-x86_64/lib/*.a artifacts-to-be-uploaded/x86_64/
+        cp cmake-out-android-x86_64/extension/android/*.so artifacts-to-be-uploaded/x86_64/
 
   # Upload the app and its test suite to S3 so that they can be downloaded by the test job
   upload-artifacts:
 
@@ -46,13 +46,9 @@ jobs:
         # ET_VERSION_DOCS will be pulled during the doc build to add to the version dropdown
         # on the website. See docs/source/conf.py for details
 
-        REF_TYPE=${{ github.ref_type }}
-        REF_NAME=${{ github.ref_name }}
-
-        echo "$REF_TYPE"
-        echo "$REF_NAME"
-
-        ET_VERSION_DOCS="${REF_NAME}"
+        GITHUB_REF=${{ github.ref }}
+        echo "$GITHUB_REF"
+        ET_VERSION_DOCS="${GITHUB_REF}"
         echo "$ET_VERSION_DOCS"
 
         set -eux
@@ -69,7 +65,6 @@ jobs:
         cd ..
 
         # If it's main branch, add noindex tag to all .html files to exclude from Google Search indexing.
-        GITHUB_REF=${{ github.ref }}
         echo "GitHub Ref: ${GITHUB_REF}"
         if [[ "${{ github.ref }}" == 'refs/heads/main' ]]; then
           find docs/_build/html/ -name "*.html" -print0 | xargs -0 sed -i '/<head>/a \ \ <meta name="robots" content="noindex">';
@@ -83,7 +78,7 @@ jobs:
 
   upload-gh-pages:
     needs: build
-    if: github.repository == 'pytorch/executorch' && github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/release/') || startsWith(github.ref, 'refs/tags/v'))
+    if: github.repository == 'pytorch/executorch' && github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/v'))
     permissions:
       contents: write
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
@@ -95,28 +90,17 @@ jobs:
       script: |
         set -euo pipefail
 
-        REF_TYPE=${{ github.ref_type }}
-        REF_NAME=${{ github.ref_name }}
-
-        # If building for a release tag, branch, set the branch/tag name
-        # as the target folder in the gh-pages branch. The artifacts created
-        # during the build will be copied over to the target dir in the
-        # gh-pages branch.
-        if [[ "${REF_TYPE}" == branch ]]; then
-          TARGET_FOLDER="${REF_NAME}"
-        elif [[ "${REF_TYPE}" == tag ]]; then
-          # Strip the leading "v" as well as the trailing patch version and "-rc" suffix.
-          # For example: 'v0.1.2' -> '0.1' and 'v0.1.2-rc1' -> 0.1.
-          case "${REF_NAME}" in
-            *-rc*)
-              echo "Aborting upload since this is an RC tag: ${REF_NAME}"
-              # We don't generate -rc* documentation but for actual tag only.
-              exit 0
-              ;;
-            *)
-              TARGET_FOLDER=$(echo "${REF_NAME}" | sed 's/v\([0-9]\+\)\.\([0-9]\+\)\.[0-9]\+/\1.\2/')
-              ;;
-          esac
+        # Get github.ref for the output doc folder. By default "main"
+        # If matches a tag like refs/tags/v1.12.0-rc3 or
+        # refs/tags/v1.12.0 convert to 1.12
+        GITHUB_REF=${{ github.ref }}
+
+        # Convert refs/tags/v1.12.0rc3 into 1.12.
+        # Adopted from https://github.com/pytorch/pytorch/blob/main/.github/workflows/_docs.yml#L150C11-L155C13
+        if [[ "${GITHUB_REF}" =~ ^refs/tags/v([0-9]+\\.[0-9]+)\\. ]]; then
+          TARGET_FOLDER="${BASH_REMATCH[1]}"
+        else
+          TARGET_FOLDER="main"
         fi
         echo "Target Folder: ${TARGET_FOLDER}"
 
 
@@ -6,58 +6,123 @@ Core ML is an optimized framework for running machine learning models on Apple d
 
 ## Layout
 - `compiler/` : Lowers a module to Core ML backend.
+- `partition/`: Partitions a module fully or partially to Core ML backend.
+- `quantizer/`: Quantizes a module in Core ML favored scheme.
 - `scripts/` : Scripts for installing dependencies and running tests.
 - `runtime/`: Core ML delegate runtime implementation.
     - `inmemoryfs`: InMemory filesystem implementation used to serialize/de-serialize AOT blob.
     - `kvstore`: Persistent Key-Value store implementation.
     - `delegate`: Runtime implementation.
     - `include` : Public headers.
-    - `tests` :  Tests for Core ML delegate.
-    - `workspace` : Xcode workspace for tests.
+    - `sdk` : SDK implementation.
+    - `tests` :  Unit tests.
+    - `workspace` : Xcode workspace for the runtime.
 - `third-party/`: External dependencies.
 
-## Help & Improvements
-If you have problems or questions or have suggestions for ways to make
-implementation and testing better, please create an issue on [github](https://www.github.com/pytorch/executorch/issues).
+## Partition and Delegation
 
-## Delegation
-
-For delegating the Program to the **Core ML** backend, the client must be responsible for calling `to_backend` with the **CoreMLBackend** tag.
+To delegate a Program to the **Core ML** backend, the client must call `to_backend` with the **CoreMLPartitioner**.
 
 ```python
-import executorch.exir as exir
 import torch
-
-from torch.export import export
-
-from executorch.exir import to_edge
-
-from executorch.exir.backend.backend_api import to_backend
+import executorch.exir
 
 from executorch.backends.apple.coreml.compiler import CoreMLBackend
+from executorch.backends.apple.coreml.partition.coreml_partitioner import CoreMLPartitioner
 
-class LowerableSubModel(torch.nn.Module):
+class Model(torch.nn.Module):
     def __init__(self):
         super().__init__()
 
     def forward(self, x):
         return torch.sin(x)
 
-# Convert the lowerable module to Edge IR Representation
-to_be_lowered = LowerableSubModel()
-example_input = (torch.ones(1), )
-to_be_lowered_exir_submodule = to_edge(export(to_be_lowered, example_input))
+source_model = Model()
+example_inputs = (torch.ones(1), )
+
+# Export the source model to Edge IR representation
+aten_program = torch.export.export(source_model, example_inputs)
+edge_program_manager = executorch.exir.to_edge(aten_program)
+
+# Delegate to Core ML backend
+delegated_program_manager = edge_program_manager.to_backend(CoreMLPartitioner())
 
-# Lower to Core ML backend
-lowered_module = to_backend('CoreMLBackend', to_be_lowered_exir_submodule.exported_program, [])
+# Serialize delegated program
+executorch_program = delegated_program_manager.to_executorch()
+with open("model.pte", "wb") as f:
+    f.write(executorch_program.buffer)
 ```
 
-Currently, the **Core ML** backend delegates the whole module to **Core ML**. If a specific op is not supported by the **Core ML** backend then the `to_backend` call would throw an exception. We will be adding a **Core ML Partitioner** to resolve the issue.
+The module will be fully or partially delegated to **Core ML**, depending on whether all or part of ops are supported by the **Core ML** backend. User may force skip certain ops by `CoreMLPartitioner(skip_ops_for_coreml_delegation=...)`
+
+The `to_backend` implementation is a thin wrapper over [coremltools](https://apple.github.io/coremltools/docs-guides/), `coremltools` is responsible for converting an **ExportedProgram** to a **MLModel**. The converted **MLModel** data is saved, flattened, and returned as bytes to **ExecuTorch**.
+
+## Quantization
 
-The `to_backend` implementation is a thin wrapper over `coremltools`, `coremltools` is responsible for converting an **ExportedProgram** to a **MLModel**. The converted **MLModel** data is saved, flattened, and returned as bytes to **ExecuTorch**.
+To quantize a Program in a Core ML favored way, the client may utilize **CoreMLQuantizer**.
+
+```python
+import torch
+import executorch.exir
+
+from torch._export import capture_pre_autograd_graph
+from torch.ao.quantization.quantize_pt2e import (
+    convert_pt2e,
+    prepare_pt2e,
+    prepare_qat_pt2e,
+)
+
+from executorch.backends.apple.coreml.quantizer.coreml_quantizer import CoreMLQuantizer
+from coremltools.optimize.torch.quantization.quantization_config import (
+    LinearQuantizerConfig,
+    QuantizationScheme,
+)
+
+class Model(torch.nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+        self.conv = torch.nn.Conv2d(
+            in_channels=3, out_channels=16, kernel_size=3, padding=1
+        )
+        self.relu = torch.nn.ReLU()
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        a = self.conv(x)
+        return self.relu(a)
+
+source_model = Model()
+example_inputs = (torch.randn((1, 3, 256, 256)), )
+
+pre_autograd_aten_dialect = capture_pre_autograd_graph(model, example_inputs)
+
+quantization_config = LinearQuantizerConfig.from_dict(
+    {
+        "global_config": {
+            "quantization_scheme": QuantizationScheme.symmetric,
+            "activation_dtype": torch.uint8,
+            "weight_dtype": torch.int8,
+            "weight_per_channel": True,
+        }
+    }
+)
+quantizer = CoreMLQuantizer(quantization_config)
+
+# For post-training quantization, use `prepare_pt2e`
+# For quantization-aware trainin,g use `prepare_qat_pt2e`
+prepared_graph = prepare_pt2e(pre_autograd_aten_dialect, quantizer)
+
+prepared_graph(*example_inputs)
+converted_graph = convert_pt2e(prepared_graph)
+```
+
+The `converted_graph` is the quantized torch model, and can be delegated to **Core ML** similarly through **CoreMLPartitioner**
 
 ## Runtime
 
-To execute a **Core ML** delegated **Program**, the client must link to the `coremldelegate` library. Once linked there are no additional steps required, **ExecuTorch** when running the **Program** would call the **Core ML** runtime to execute the **Core ML** delegated part of the **Program**.
+To execute a Core ML delegated program, the application must link to the `coremldelegate` library. Once linked there are no additional steps required, ExecuTorch when running the program would call the Core ML runtime to execute the Core ML delegated part of the program.
 
 Please follow the instructions described in the [Core ML setup](/backends/apple/coreml/setup.md) to link the `coremldelegate` library.
+
+## Help & Improvements
+If you have problems or questions or have suggestions for ways to make
+implementation and testing better, please create an issue on [github](https://www.github.com/pytorch/executorch/issues).
@@ -29,8 +29,8 @@ python3 -m examples.apple.coreml.scripts.export --model_name add
 4. You can now integrate the **Core ML** backend in code.
 
 ```python
-# Lower to Core ML backend
-lowered_module = to_backend('CoreMLBackend', to_be_lowered_exir_submodule, [])
+# Delegate to Core ML backend
+delegated_program_manager = edge_program_manager.to_backend(CoreMLPartitioner())
 ```
 
 
@@ -46,15 +46,15 @@ lowered_module = to_backend('CoreMLBackend', to_be_lowered_exir_submodule, [])
 xcode-select --install
 ```
 
-2. Build **Core ML** delegate. The following will create a `executorch.xcframework` in `cmake-out` directory.
+4. Build **Core ML** delegate. The following will create `executorch.xcframework` and `coreml_backend.xcframework` in the `cmake-out` directory.
 
 ```bash
 cd executorch
 ./build/build_apple_frameworks.sh --Release --coreml
 ```
-3. Open the project in Xcode, and drag the `executorch.xcframework` generated from Step 2 to Frameworks.
+5. Open the project in Xcode, and drag `executorch.xcframework` and `coreml_backend.xcframework` frameworks generated from Step 2 to Frameworks.
 
-4. Go to project Target’s Build Phases -  Link Binaries With Libraries, click the + sign, and add the following frameworks:
+6. Go to project Target’s Build Phases -  Link Binaries With Libraries, click the + sign, and add the following frameworks:
 
 ```
 executorch.xcframework
@@ -63,9 +63,9 @@ coreml_backend.xcframework
 
 5. Go to project Target’s Build Phases -  Link Binaries With Libraries, click the + sign, and add the following frameworks.
 ```
-- Accelerate.framework
-- CoreML.framework
-- libsqlite3.tbd
+Accelerate.framework
+CoreML.framework
+libsqlite3.tbd
 ```
 
 6. The target could now run a **Core ML** delegated **Program**.
@@ -18,6 +18,7 @@
 from executorch.backends.apple.mps.serialization.mps_graph_schema import (
     MPSGraph,
     MPSTensor,
+    OpType,
 )
 
 from executorch.backends.apple.mps.serialization.mps_graph_serialize import (
@@ -65,6 +66,7 @@ def preprocess(
             input_ids=[],
             output_ids=[],
             constant_ids=[],
+            graph_type=OpType.mps_graph,
         )
 
         convert_model_to_fp16 = True
@@ -111,6 +113,16 @@ def handle_call_function(
         mps_graph: MPSGraph,
     ) -> None:
         logging.info(f"Visiting: {node}, {node.target.__name__}")
+
+        if (
+            "delegation_tag" in node.meta
+            and "metal_kernel" in node.meta["delegation_tag"]
+        ):
+            logging.info(
+                f"Node '{node.target.__name__}' was marked as a Metal kernel by the MPSPartitioner!"
+            )
+            mps_graph.graph_type = OpType.metal_kernel
+
         if node.target.__name__ in node_visitors:
             node_visitors[node.target.__name__].define_node(node, mps_graph)
         else: