Fix test_llama

jackzhxng · jackzhxng · commit 386bb05b02b6 · 2025-05-27T16:41:45.000-07:00
ghstack-source-id: 714a076 Pull Request resolved: #11165
diff --git a/backends/arm/test/models/test_llama.py b/backends/arm/test/models/test_llama.py
@@ -22,6 +22,7 @@
     TosaPipelineMI,
 )
 
+from executorch.examples.models.llama.config.llm_config_utils import convert_args_to_llm_config
 from executorch.examples.models.llama.export_llama_lib import (
     build_args_parser,
     get_llama_model,
@@ -89,8 +90,9 @@ def prepare_model(self):
         ]
         parser = build_args_parser()
         args = parser.parse_args(args)
+        llm_config = convert_args_to_llm_config(args)
 
-        llama_model, llama_inputs, llama_meta = get_llama_model(args)
+        llama_model, llama_inputs, llama_meta = get_llama_model(llm_config)
 
         return llama_model, llama_inputs, llama_meta
 
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
@@ -805,10 +805,6 @@ def _qmode_type(value):
 
 
 def _validate_args(llm_config):
-    """
-    TODO: Combine all the backends under --backend args
-    """
-
     if llm_config.export.max_context_length < llm_config.export.max_seq_length:
         raise ValueError(
             f"max_context_length {llm_config.export.max_context_length} must be >= max_seq_len {llm_config.export.max_seq_length}. max_context_length impacts kv cache size that is used to remember history, while max_seq_length refers to user prompt length. Please use --max_context_length to specify context length."
@@ -1498,9 +1494,9 @@ def _get_source_transforms(  # noqa
     return transforms
 
 
-def get_llama_model(args):
-    _validate_args(args)
-    e_mgr = _prepare_for_llama_export(args)
+def get_llama_model(llm_config: LlmConfig):
+    _validate_args(llm_config)
+    e_mgr = _prepare_for_llama_export(llm_config)
     model = (
         e_mgr.model.eval().to(device="cuda")
         if torch.cuda.is_available()

Original file line number	Diff line number	Diff line change
`@@ -22,6 +22,7 @@`
`22`	`22`	`TosaPipelineMI,`
`23`	`23`	`)`
`24`	`24`
	`25`	`+from executorch.examples.models.llama.config.llm_config_utils import convert_args_to_llm_config`
`25`	`26`	`from executorch.examples.models.llama.export_llama_lib import (`
`26`	`27`	`build_args_parser,`
`27`	`28`	`get_llama_model,`
`@@ -89,8 +90,9 @@ def prepare_model(self):`
`89`	`90`	`]`
`90`	`91`	`parser = build_args_parser()`
`91`	`92`	`args = parser.parse_args(args)`
	`93`	`+ llm_config = convert_args_to_llm_config(args)`
`92`	`94`
`93`		`- llama_model, llama_inputs, llama_meta = get_llama_model(args)`
	`95`	`+ llama_model, llama_inputs, llama_meta = get_llama_model(llm_config)`
`94`	`96`
`95`	`97`	`return llama_model, llama_inputs, llama_meta`
`96`	`98`