Add Llama3.2 CoreML ANE to benchmark (#7399)

guangy10 · Github Executorch · metascroy · web-flow · commit b2a680b8028f · 2024-12-19T17:55:37.000-08:00
* Add Llama3.2 CoreML ANE to benchmark

* Update build_apple_frameworks.sh

* Update ETCoreMLModel.mm

* Update apple-perf.yml

* Update ETCoreMLModel.mm

* Update ETCoreMLModel.mm

* Update build_apple_frameworks.sh

* Update apple-perf.yml

* Auto-upgrade device pool for ANE

---------

Co-authored-by: Github Executorch &lt;github_executorch@arm.com&gt;
Co-authored-by: Scott Roy &lt;161522778+metascroy@users.noreply.github.com&gt;
diff --git a/.ci/scripts/gather_benchmark_configs.py b/.ci/scripts/gather_benchmark_configs.py
@@ -17,6 +17,7 @@
 # Device pools for AWS Device Farm
 DEVICE_POOLS = {
     "apple_iphone_15": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/3b5acd2e-92e2-4778-b651-7726bafe129d",
+    "apple_iphone_15+ios_18": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/12c8b15c-8d03-4e07-950d-0a627e7595b4",
     "samsung_galaxy_s22": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa",
     "samsung_galaxy_s24": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98f8788c-2e25-4a3c-8bb2-0d1e8897c0db",
     "google_pixel_8_pro": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/d65096ab-900b-4521-be8b-a3619b69236a",
@@ -39,8 +40,7 @@
     "ios": [
         "coreml_fp16",
         "mps",
-        # TODO: Add support for llama3 ane
-        # "llama3_coreml_ane",
+        "llama3_coreml_ane",
     ],
 }
 
@@ -198,10 +198,17 @@ def get_benchmark_configs() -> Dict[str, Dict]:
 
         # Add configurations for each valid device
         for device in devices:
-            if device not in DEVICE_POOLS:
-                logging.warning(f"Unsupported device '{device}'. Skipping.")
-                continue
             for config in configs:
+                if config == "llama3_coreml_ane" and not device.endswith("+ios_18"):
+                    device = f"{device}+ios_18"
+                    logging.info(
+                        f"Benchmark config '{config}' only works on iOS 18+, auto-upgraded device pool to '{device}'"
+                    )
+
+                if device not in DEVICE_POOLS:
+                    logging.warning(f"Unsupported device '{device}'. Skipping.")
+                    continue
+
                 record = {
                     "model": model_name,
                     "config": config,
diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml
@@ -261,7 +261,7 @@ jobs:
                 -kv \
                 --disable_dynamic_shape \
                 --coreml \
-                --coreml-ios 17 \
+                --coreml-ios 18 \
                 --coreml-quantize c4w \
                 --coreml-compute-units cpu_and_ne \
                 --output_name="${OUT_ET_MODEL_NAME}.pte"
diff --git a/extension/llm/export/partitioner_lib.py b/extension/llm/export/partitioner_lib.py
@@ -158,7 +158,11 @@ def _validate_ios_version() -> None:
         op_linear_quantizer_config=op_linear_quantizer_config,
     )
 
-    take_over_mutable_buffer = minimum_deployment_target >= ct.target.iOS18
+    # ExecuTorch does not build CoreML delegate runtime to handle state
+    # when using OSS scripts, so we define take_over_mutable_buffer = False,
+    # even when target is iOS18
+    # take_over_mutable_buffer = minimum_deployment_target >= ct.target.iOS18
+    take_over_mutable_buffer = False
     return CoreMLPartitioner(  # pyre-fixme[16]
         compile_specs=compile_specs,
         take_over_mutable_buffer=take_over_mutable_buffer,