Update on "Reduce memory requirement on export_llama tests with no params"

jackzhxng · jackzhxng · commit 385c5e14005f · 2025-06-03T00:50:31.000-07:00
For some reason, after the previous PR in the stack, test_export_llama_lib was ooming on gh actions CI and I couldn't really figure out why, since I profiled the running memory on the test both before and after they were the same. This addresses fixes the ci oom, and I've been meaning to do this anyway since if we are loading a transformer without params specified, we likely just want to test some basic functionality, so a 1 layer makes more sense than a 8 layer default. Have made sure code elsewhere is not relying on this 8 layer default atm. Differential Revision: [D75498713](https://our.internmc.facebook.com/intern/diff/D75498713) [ghstack-poisoned]
diff --git a/examples/models/llama/TARGETS b/examples/models/llama/TARGETS
@@ -82,6 +82,8 @@ runtime.python_binary(
     ],
     deps = [
         ":export_library",
+        ":export_llama_args",
+        ":export_llama_hydra",
         "//caffe2:torch",
         "//executorch/extension/pybindings:aten_lib",
     ],
@@ -148,6 +150,8 @@ runtime.python_library(
         ":source_transformation",
         "//ai_codesign/gen_ai/fast_hadamard_transform:fast_hadamard_transform",
         "//caffe2:torch",
+        "//executorch/examples/models/llama/config:llm_config",
+        "//executorch/examples/models/llama/config:llm_config_utils",
         "//executorch/backends/vulkan/_passes:vulkan_passes",
         "//executorch/exir/passes:init_mutable_pass",
         "//executorch/examples/models:model_base",
@@ -231,6 +235,40 @@ runtime.python_library(
     ],
 )
 
+runtime.python_library(
+    name = "export_llama_args",
+    srcs = [
+        "export_llama_args.py",
+    ],
+    _is_external_target = True,
+    base_module = "executorch.examples.models.llama",
+    visibility = [
+        "//executorch/examples/...",
+        "@EXECUTORCH_CLIENTS",
+    ],
+    deps = [
+        ":export_library",
+    ],
+)
+
+runtime.python_library(
+    name = "export_llama_hydra",
+    srcs = [
+        "export_llama_hydra.py",
+    ],
+    _is_external_target = True,
+    base_module = "executorch.examples.models.llama",
+    visibility = [
+        "//executorch/examples/...",
+        "@EXECUTORCH_CLIENTS",
+    ],
+    deps = [
+        ":export_library",
+        "//executorch/examples/models/llama/config:llm_config",
+        "fbsource//third-party/pypi/hydra-core:hydra-core",
+    ],
+)
+
 runtime.python_test(
     name = "quantized_kv_cache_test",
     srcs = [
diff --git a/examples/models/llama/config/TARGETS b/examples/models/llama/config/TARGETS
@@ -0,0 +1,9 @@
+# Any targets that should be shared between fbcode and xplat must be defined in
+# targets.bzl. This file can contain fbcode-only targets.
+
+load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
+load(":targets.bzl", "define_common_targets")
+
+oncall("executorch")
+
+define_common_targets()
diff --git a/examples/models/llama/config/llm_config.py b/examples/models/llama/config/llm_config.py
@@ -218,7 +218,7 @@ class CoreMLConfig:
     enable_state: bool = False
     preserve_sdpa: bool = False
     quantize: Optional[CoreMLQuantize] = None
-    ios: Literal[15, 16, 17, 18] = 15
+    ios: int = 15
     compute_units: CoreMLComputeUnit = CoreMLComputeUnit.CPU_ONLY
 
     def __post_init__(self):
diff --git a/examples/models/llama/config/targets.bzl b/examples/models/llama/config/targets.bzl
@@ -0,0 +1,31 @@
+load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
+
+def define_common_targets():
+    runtime.python_library(
+        name = "llm_config",
+        srcs = [
+            "llm_config.py",
+        ],
+        _is_external_target = True,
+        base_module = "executorch.examples.models.llama.config",
+        visibility = [
+            "//executorch/...",
+            "@EXECUTORCH_CLIENTS",
+        ],
+    )
+
+    runtime.python_library(
+        name = "llm_config_utils",
+        srcs = [
+            "llm_config_utils.py",
+        ],
+        _is_external_target = True,
+        base_module = "executorch.examples.models.llama.config",
+        visibility = [
+            "//executorch/...",
+            "@EXECUTORCH_CLIENTS",
+        ],
+        deps = [
+            ":llm_config",
+        ],
+    )