Add llama jobs on Arm64 and reduce llama jobs on MacOS (#9251)

mergennachin · web-flow · commit 699ee7def80d · 2025-03-13T20:54:25.000-04:00
Reduce macos llama runners

Add arm64 llama runners: distribute into pull.yml and trunk.yml jobs.
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -136,6 +136,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}"
 
   test-llama-runner-linux:
+    # Test Both linux x86 and linux aarch64
     name: test-llama-runner-linux
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     permissions:
@@ -144,21 +145,29 @@ jobs:
     strategy:
       matrix:
         dtype: [fp32]
-        mode: [portable, xnnpack+custom, xnnpack+custom+qe,xnnpack+custom+quantize_kv,xnnpack+quantize_kv]
+        mode: [xnnpack+custom+qe,xnnpack+custom+quantize_kv,xnnpack+quantize_kv]
+        runner: [linux.2xlarge, linux.arm64.2xlarge]
+        docker-image: [executorch-ubuntu-22.04-clang12, executorch-ubuntu-22.04-gcc11-aarch64]
         include:
-          - dtype: bf16
-            mode: portable
           - dtype: bf16
             mode: custom
+            runner: linux.2xlarge
+            docker-image: executorch-ubuntu-22.04-clang12
+        # Excluding specific runner + docker image combinations that don't make sense:
+        #   - Excluding the ARM64 gcc image on the x86 runner (linux.2xlarge)
+        #   - Excluding the x86 clang image on the ARM64 runner (linux.arm64.2xlarge)
+        exclude:
+          - runner: linux.2xlarge
+            docker-image: executorch-ubuntu-22.04-gcc11-aarch64
+          - runner: linux.arm64.2xlarge
+            docker-image: executorch-ubuntu-22.04-clang12
       fail-fast: false
     with:
-      runner: linux.2xlarge
-      docker-image: executorch-ubuntu-22.04-clang12
+      runner: ${{ matrix.runner }}
+      docker-image: ${{ matrix.docker-image }}
       submodules: 'true'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: 900
-      upload-artifact: android-models
-      upload-artifact-to-s3: true
       script: |
         # The generic Linux job chooses to use base env, not the one setup by the image
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
@@ -283,18 +283,72 @@ jobs:
         # Test ANE llama
         ${CONDA_RUN} sh .ci/scripts/test_ane_static_llama.sh
 
-  test-llama-runner-macos:
-    name: test-llama-runner-mac
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+  test-llama-runner-linux:
+    # Test Both linux x86 and linux aarch64
+    name: test-llama-runner-linux
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
     strategy:
       matrix:
         dtype: [fp32]
-        mode: [portable, xnnpack+kv+custom, mps, coreml, xnnpack+custom+quantize_kv]
+        mode: [portable, xnnpack+custom]
+        runner: [linux.2xlarge, linux.arm64.2xlarge]
+        docker-image: [executorch-ubuntu-22.04-clang12, executorch-ubuntu-22.04-gcc11-aarch64]
         include:
           - dtype: bf16
             mode: portable
+            runner: linux.2xlarge
+            docker-image: executorch-ubuntu-22.04-clang12
+          - dtype: bf16
+            mode: portable
+            runner: linux.arm64.2xlarge
+            docker-image: executorch-ubuntu-22.04-gcc11-aarch64
           - dtype: bf16
             mode: custom
+            runner: linux.arm64.2xlarge
+            docker-image: executorch-ubuntu-22.04-gcc11-aarch64
+        # Excluding specific runner + docker image combinations that don't make sense:
+        #   - Excluding the ARM64 gcc image on the x86 runner (linux.2xlarge)
+        #   - Excluding the x86 clang image on the ARM64 runner (linux.arm64.2xlarge)
+        exclude:
+          - runner: linux.2xlarge
+            docker-image: executorch-ubuntu-22.04-gcc11-aarch64
+          - runner: linux.arm64.2xlarge
+            docker-image: executorch-ubuntu-22.04-clang12
+      fail-fast: false
+    with:
+      runner: ${{ matrix.runner }}
+      docker-image: ${{ matrix.docker-image }}
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 900
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        DTYPE=${{ matrix.dtype }}
+        BUILD_TOOL="cmake"
+        MODE=${{ matrix.mode }}
+        ARTIFACTS_DIR_NAME="artifacts-to-be-uploaded/${DTYPE}-${MODE}"
+        ARTIFACTS_DIR_NAME="${ARTIFACTS_DIR_NAME/+/-}"
+
+        # Setup executorch
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+        # Install requirements for export_llama
+        PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
+        # Test llama2
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -upload "${ARTIFACTS_DIR_NAME}"
+
+  test-llama-runner-macos:
+    name: test-llama-runner-mac
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    strategy:
+      matrix:
+        dtype: [fp32]
+        mode: [mps, coreml, xnnpack+custom+quantize_kv]
       fail-fast: false
     with:
       runner: macos-m1-stable