pytorch
diff --git a/‎.github/workflows/_android.yml
Lines changed: 10 additions & 0 deletions b/‎.github/workflows/_android.yml
Lines changed: 10 additions & 0 deletions
diff --git a/‎.github/workflows/ghstack_land.yml
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/ghstack_land.yml
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/pull.yml
Lines changed: 17 additions & 46 deletions b/‎.github/workflows/pull.yml
Lines changed: 17 additions & 46 deletions
diff --git a/‎backends/apple/coreml/compiler/coreml_preprocess.py
Lines changed: 4 additions & 1 deletion b/‎backends/apple/coreml/compiler/coreml_preprocess.py
Lines changed: 4 additions & 1 deletion
diff --git a/‎backends/apple/coreml/runtime/delegate/backend_delegate.h
Lines changed: 1 addition & 1 deletion b/‎backends/apple/coreml/runtime/delegate/backend_delegate.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/apple/coreml/runtime/delegate/com.apple.executorchcoreml_config.plist
Lines changed: 1 addition & 1 deletion b/‎backends/apple/coreml/runtime/delegate/com.apple.executorchcoreml_config.plist
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/apple/coreml/runtime/test/CoreMLBackendDelegateTests.mm
Lines changed: 6 additions & 0 deletions b/‎backends/apple/coreml/runtime/test/CoreMLBackendDelegateTests.mm
Lines changed: 6 additions & 0 deletions
diff --git a/‎backends/apple/coreml/runtime/workspace/executorchcoreml.xcodeproj/project.pbxproj
Lines changed: 4 additions & 0 deletions b/‎backends/apple/coreml/runtime/workspace/executorchcoreml.xcodeproj/project.pbxproj
Lines changed: 4 additions & 0 deletions
diff --git a/‎backends/apple/coreml/scripts/generate_test_models.sh
Lines changed: 9 additions & 0 deletions b/‎backends/apple/coreml/scripts/generate_test_models.sh
Lines changed: 9 additions & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 2 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 2 additions & 0 deletions
@@ -66,6 +66,16 @@ jobs:
           # avoid permission issue
           sudo chown -R "${USER}" /opt/android
 
+      - name: Download Artifacts
+        shell: bash
+        run: |
+          set -eux
+          curl -O https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/llm_demo/app-debug.apk
+          curl -O https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/llm_demo/app-debug-androidTest.apk
+          curl -O https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/fp32-xnnpack-custom/model.zip
+          unzip model.zip
+          mv *.pte model.pte
+
       - name: Gradle cache
         uses: gradle/actions/setup-gradle@v3
 
 
@@ -11,6 +11,7 @@ on:
       - 'gh/kimishpatel/[0-9]+/base'
       - 'gh/kirklandsign/[0-9]+/base'
       - 'gh/larryliu0820/[0-9]+/base'
+      - 'gh/lucylq/[0-9]+/base'
       - 'gh/manuelcandales/[0-9]+/base'
       - 'gh/mcr229/[0-9]+/base'
       - 'gh/swolchok/[0-9]+/base'
 
@@ -35,9 +35,6 @@ jobs:
     name: test-setup-linux-gcc
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     strategy:
-      matrix:
-        include:
-          - build-tool: cmake
       fail-fast: false
     with:
       runner: linux.2xlarge
@@ -50,7 +47,7 @@ jobs:
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
 
-        BUILD_TOOL=${{ matrix.build-tool }}
+        BUILD_TOOL="cmake"
 
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
         # Build and test ExecuTorch with the add model on portable backend.
@@ -89,20 +86,11 @@ jobs:
     strategy:
       matrix:
         dtype: [fp32]
-        build-tool: [buck2, cmake]
         mode: [portable, xnnpack+custom, xnnpack+custom+qe]
         include:
           - dtype: bf16
-            build-tool: cmake
-            mode: portable
-          - dtype: bf16
-            build-tool: buck2
             mode: portable
           - dtype: bf16
-            build-tool: cmake
-            mode: custom
-          - dtype: bf16
-            build-tool: buck2
             mode: custom
       fail-fast: false
     with:
@@ -111,29 +99,30 @@ jobs:
       submodules: 'true'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: 900
+      upload-artifact: android-models
+      upload-artifact-to-s3: true
       script: |
         # The generic Linux job chooses to use base env, not the one setup by the image
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
 
         DTYPE=${{ matrix.dtype }}
-        BUILD_TOOL=${{ matrix.build-tool }}
+        BUILD_TOOL="cmake"
         MODE=${{ matrix.mode }}
+        ARTIFACTS_DIR_NAME="artifacts-to-be-uploaded/${DTYPE}-${MODE}"
+        ARTIFACTS_DIR_NAME="${ARTIFACTS_DIR_NAME/+/-}"
 
         # Setup executorch
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
         # Install requirements for export_llama
         PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
         # Test llama2
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}" "${ARTIFACTS_DIR_NAME}"
 
   test-llama-runner-linux-android:
     name: test-llama-runner-linux-android
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     strategy:
-      matrix:
-        include:
-          - build-tool: cmake
       fail-fast: false
     with:
       runner: linux.2xlarge
@@ -146,18 +135,14 @@ jobs:
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
 
-        BUILD_TOOL=${{ matrix.build-tool }}
+        BUILD_TOOL="cmake"
         PYTHON_EXECUTABLE=python \
         bash .ci/scripts/build_llama_android.sh  "${BUILD_TOOL}"
 
   test-custom-ops-linux:
     name: test-custom-ops-linux
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     strategy:
-      matrix:
-        include:
-          - build-tool: buck2
-          - build-tool: cmake
       fail-fast: false
     with:
       runner: linux.2xlarge
@@ -170,7 +155,7 @@ jobs:
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
 
-        BUILD_TOOL=${{ matrix.build-tool }}
+        BUILD_TOOL="cmake"
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
         # Test custom ops
         PYTHON_EXECUTABLE=python bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}"
@@ -179,10 +164,6 @@ jobs:
     name: test-selective-build-linux
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     strategy:
-      matrix:
-        include:
-          - build-tool: buck2
-          - build-tool: cmake
       fail-fast: false
     with:
       runner: linux.2xlarge
@@ -195,7 +176,7 @@ jobs:
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
 
-        BUILD_TOOL=${{ matrix.build-tool }}
+        BUILD_TOOL="cmake"
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
         # Test selective build
         PYTHON_EXECUTABLE=python bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"
@@ -235,9 +216,6 @@ jobs:
     name: test-quantized-aot-lib-linux
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     strategy:
-      matrix:
-        include:
-          - build-tool: cmake
       fail-fast: false
     with:
       runner: linux.2xlarge
@@ -250,17 +228,14 @@ jobs:
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
 
-        BUILD_TOOL=${{ matrix.build-tool }}
+        BUILD_TOOL="cmake"
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
         PYTHON_EXECUTABLE=python bash examples/xnnpack/quantization/test_quantize.sh "${BUILD_TOOL}" mv2
 
   test-pybind-build-linux:
     name: test-pybind-build-linux
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     strategy:
-      matrix:
-        include:
-          - build-tool: cmake
       fail-fast: false
     with:
       runner: linux.2xlarge
@@ -274,7 +249,7 @@ jobs:
         conda activate "${CONDA_ENV}"
 
         # build module for executorch.extension.pybindings.portable_lib
-        BUILD_TOOL=${{ matrix.build-tool }}
+        BUILD_TOOL="cmake"
         PYTHON_EXECUTABLE=python \
         EXECUTORCH_BUILD_XNNPACK=ON \
         EXECUTORCH_BUILD_PYBIND=ON \
@@ -349,6 +324,7 @@ jobs:
 
   android:
     uses: ./.github/workflows/_android.yml
+    needs: test-llama-runner-linux
 
   unittest:
     uses: ./.github/workflows/_unittest.yml
@@ -357,10 +333,6 @@ jobs:
 
   unittest-arm:
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
-    strategy:
-      matrix:
-        include:
-          - build-tool: buck2
     with:
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-arm-sdk
@@ -374,7 +346,7 @@ jobs:
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
 
-        BUILD_TOOL=${{ matrix.build-tool }}
+        BUILD_TOOL="cmake"
 
         # Setup MacOS dependencies as there is no Docker support on MacOS atm
         PYTHON_EXECUTABLE=python \
@@ -396,7 +368,6 @@ jobs:
     strategy:
       matrix:
         dtype: [fp32]
-        build-tool: [cmake]
         mode: [qnn]
       fail-fast: false
     with:
@@ -411,14 +382,14 @@ jobs:
         conda activate "${CONDA_ENV}"
 
         DTYPE=${{ matrix.dtype }}
-        BUILD_TOOL=${{ matrix.build-tool }}
+        BUILD_TOOL="cmake"
         MODE=${{ matrix.mode }}
 
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
         PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
 
         # Setup executorch
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
         # Install requirements for export_llama
         PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
         # Test llama2
 
@@ -425,12 +425,15 @@ def preprocess(
             CoreMLBackend.op_linear_quantizer_config_from_compile_specs(compile_specs)
         )
 
+        # Load the model if MODEL_TYPE is 'COMPILED_MODEL'. This step is necessary because
+        # get_compiled_model_path() requires a loaded model.
+        skip_model_load = model_type != CoreMLBackend.MODEL_TYPE.COMPILED_MODEL
         mlmodel = ct.convert(
             model=edge_program,
             source="pytorch",
             convert_to="mlprogram",
             pass_pipeline=ct.PassPipeline.DEFAULT,
-            skip_model_load=True,
+            skip_model_load=skip_model_load,
             compute_precision=model_compute_precision,
             minimum_deployment_target=minimum_deployment_target,
             compute_units=compute_units,
 
@@ -28,7 +28,7 @@ class BackendDelegate {
         // Max models cache size in bytes.
         size_t max_models_cache_size = 10 * size_t(1024) * size_t(1024) * size_t(1024);
         // If set to `true`, delegate pre-warms the most recently used asset.
-        bool should_prewarm_asset = true;
+        bool should_prewarm_asset = false;
         // If set to `true`, delegate pre-warms the model in `init`.
         bool should_prewarm_model = true;
     };
 
@@ -3,7 +3,7 @@
 <plist version="1.0">
 <dict>
 	<key>shouldPrewarmAsset</key>
-	<true/>
+	<false/>
 	<key>shouldPrewarmModel</key>
 	<true/>
 	<key>maxAssetsSizeInBytes</key>
 
@@ -209,6 +209,12 @@ - (void)testStateProgramExecute {
 }
 #endif
 
+- (void)testAddMulCompiledProgramExecute {
+    NSURL *modelURL = [[self class] bundledResourceWithName:@"add_mul_compiled_coreml_all" extension:@"pte"];
+    XCTAssertNotNil(modelURL);
+    [self executeModelAtURL:modelURL nLoads:1 nExecutions:2];
+}
+
 - (void)executeMultipleModelsConcurrently:(NSArray<NSURL *> *)modelURLs
                                    nLoads:(NSUInteger)nLoads
                               nExecutions:(NSUInteger)nExecutions
 
@@ -8,6 +8,7 @@
 
 /* Begin PBXBuildFile section */
 		8307EB8A2C9262060011AE6D /* state_coreml_all.pte in Resources */ = {isa = PBXBuildFile; fileRef = 8307EB892C9262060011AE6D /* state_coreml_all.pte */; };
+		838CA6872CD1965700462190 /* add_mul_compiled_coreml_all.pte in Resources */ = {isa = PBXBuildFile; fileRef = 838CA6862CD1965700462190 /* add_mul_compiled_coreml_all.pte */; };
 		83BB78A02C65DA7300274ED7 /* ETCoreMLModelDebugInfo.mm in Sources */ = {isa = PBXBuildFile; fileRef = 83BB789F2C65DA7300274ED7 /* ETCoreMLModelDebugInfo.mm */; };
 		83BB78BF2C66AAAE00274ED7 /* add_mul_coreml_all.bin in Resources */ = {isa = PBXBuildFile; fileRef = 83BB78BD2C66AAAE00274ED7 /* add_mul_coreml_all.bin */; };
 		83BB78C02C66AAAE00274ED7 /* add_mul_coreml_all.pte in Resources */ = {isa = PBXBuildFile; fileRef = 83BB78BE2C66AAAE00274ED7 /* add_mul_coreml_all.pte */; };
@@ -122,6 +123,7 @@
 
 /* Begin PBXFileReference section */
 		8307EB892C9262060011AE6D /* state_coreml_all.pte */ = {isa = PBXFileReference; lastKnownFileType = file; name = state_coreml_all.pte; path = ../test/models/state_coreml_all.pte; sourceTree = "<group>"; };
+		838CA6862CD1965700462190 /* add_mul_compiled_coreml_all.pte */ = {isa = PBXFileReference; lastKnownFileType = file; name = add_mul_compiled_coreml_all.pte; path = ../test/models/add_mul_compiled_coreml_all.pte; sourceTree = "<group>"; };
 		83BB789E2C65DA7300274ED7 /* ETCoreMLModelDebugInfo.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = ETCoreMLModelDebugInfo.h; path = ../sdk/ETCoreMLModelDebugInfo.h; sourceTree = "<group>"; };
 		83BB789F2C65DA7300274ED7 /* ETCoreMLModelDebugInfo.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; name = ETCoreMLModelDebugInfo.mm; path = ../sdk/ETCoreMLModelDebugInfo.mm; sourceTree = "<group>"; };
 		83BB78BD2C66AAAE00274ED7 /* add_mul_coreml_all.bin */ = {isa = PBXFileReference; lastKnownFileType = archive.macbinary; name = add_mul_coreml_all.bin; path = ../test/models/add_mul_coreml_all.bin; sourceTree = "<group>"; };
@@ -606,6 +608,7 @@
 				C98551992AD2542D009143F9 /* mul_coreml_all.bin */,
 				C985519C2AD2542D009143F9 /* mul_coreml_all.pte */,
 				C985519B2AD2542D009143F9 /* mv3_coreml_all.bin */,
+				838CA6862CD1965700462190 /* add_mul_compiled_coreml_all.pte */,
 				C98551982AD2542D009143F9 /* mv3_coreml_all.pte */,
 				83BB78BD2C66AAAE00274ED7 /* add_mul_coreml_all.bin */,
 				83BB78BE2C66AAAE00274ED7 /* add_mul_coreml_all.pte */,
@@ -680,6 +683,7 @@
 				C985519E2AD2542D009143F9 /* mv3_coreml_all.pte in Resources */,
 				C98551A02AD2542D009143F9 /* add_coreml_all.bin in Resources */,
 				C98551A22AD2542D009143F9 /* mul_coreml_all.pte in Resources */,
+				838CA6872CD1965700462190 /* add_mul_compiled_coreml_all.pte in Resources */,
 				8307EB8A2C9262060011AE6D /* state_coreml_all.pte in Resources */,
 				C98551A32AD2542D009143F9 /* add_coreml_all.pte in Resources */,
 			);
 
@@ -31,3 +31,12 @@ done
 
 echo "Executorch: Generating stateful model"
 python3 "$SCRIPT_DIR_PATH/../runtime/test/export_stateful_model.py"
+
+COMPILE_MODELS=("add_mul")
+echo "Executorch: Generating compiled model"
+for MODEL in "${COMPILE_MODELS[@]}"
+do
+  echo "Executorch: Generating compiled $MODEL model" 
+  python3 -m examples.apple.coreml.scripts.export --model_name "$MODEL" --compile
+  mv -f "$MODEL""_compiled_coreml_all.pte" "$COREML_DIR_PATH/runtime/test/models"
+done
@@ -12,6 +12,7 @@
     AnnotateChannelsLastDimOrder,
 )
 from executorch.backends.arm._passes.cast_int64_pass import CastInt64ToInt32Pass
+from executorch.backends.arm._passes.conv1d_unsqueeze_pass import Conv1dUnsqueezePass
 from executorch.backends.arm._passes.convert_expand_copy_to_repeat import (
     ConvertExpandCopyToRepeatPass,
 )
@@ -69,6 +70,7 @@ def transform_to_backend_pipeline(
         self.add_pass(DecomposeDivPass())
         self.add_pass(InsertSqueezeAfterSumPass())
         self.add_pass(ConvertSplitToSlicePass())
+        self.add_pass(Conv1dUnsqueezePass(exported_program))
         self.add_pass(DecomposeSoftmaxesPass())
         for spec in compile_spec:
             if spec.key == "permute_memory_format":