Update on "Use llm_config instead of args in export_llama functions"

jackzhxng · jackzhxng · commit 97ec69cbad19 · 2025-05-27T18:11:41.000-07:00
Differential Revision: [D75484927](https://our.internmc.facebook.com/intern/diff/D75484927) [ghstack-poisoned]
diff --git a/examples/models/llama/model.py b/examples/models/llama/model.py
@@ -8,7 +8,7 @@
 
 import json
 import os
-from typing import Dict, Tuple
+from typing import Dict, Optional, Tuple
 
 import torch
 from executorch.examples.models.checkpoint import (
@@ -37,17 +37,13 @@ def convert_to_llama_checkpoint(**kwargs):
 
 
 class Llama2Model(EagerModelBase):
-    def __init__(self, llm_config: LlmConfig):
+    def __init__(self, llm_config: Optional[LlmConfig] = None):
         resource_dir = get_default_model_resource_dir(__file__)
 
-        self.llm_config = llm_config
+        self.llm_config = llm_config if llm_config else LlmConfig()
 
-        # Use single checkpoint file.
         checkpoint_path = self.llm_config.base.checkpoint
-        # Check if checkpoint_dir was provided for a sharded checkpoint.
         checkpoint_dir = self.llm_config.base.checkpoint_dir
-
-        # Params file.
         params_path = self.llm_config.base.params
 
         self.use_kv_cache = self.llm_config.model.use_kv_cache
diff --git a/examples/models/llama/tests/test_export_llama_lib.py b/examples/models/llama/tests/test_export_llama_lib.py
@@ -48,7 +48,7 @@ def test_has_expected_ops_and_op_counts(self):
         args.use_kv_cache = True
         args.verbose = True
 
-        builder = _export_llama(llm_config, args)
+        builder = _export_llama(llm_config)
         graph_module = builder.edge_manager.exported_program().graph_module
         delegation_info = get_delegation_info(graph_module)
 
diff --git a/examples/models/llava/export_llava.py b/examples/models/llava/export_llava.py
@@ -17,11 +17,7 @@
     XNNPACKQuantizer,
 )
 from executorch.examples.models.llama.config.llm_config import LlmConfig
-from executorch.examples.models.llama.config.llm_config_utils import (
-    convert_args_to_llm_config,
-)
 from executorch.examples.models.llama.export_llama_lib import (
-    build_args_parser,
     get_quantizer_and_quant_params,
 )
 from executorch.examples.models.llama.source_transformation.custom_kv_cache import (
@@ -96,12 +92,8 @@ def forward(self, input_pos, embeddings):
         dynamic_shapes=dynamic_shapes,
     )
 
-    # (Legacy) parse args then convert to LlmConfig.
-    parser = build_args_parser()
-    args = parser.parse_args()
-    llm_config = convert_args_to_llm_config(args)
-
     # Manually set some LlmConfig options.
+    llm_config = LlmConfig()
     llm_config.base.params = "params.json"
     llm_config.backend.xnnpack.enabled = True
     llm_config.quantization.qmode = "8da4w"