Add support to export XNNPACK based static_llama

Di Xu (SWE) · facebook-github-bot · commit a91eb3110a33 · 2025-01-06T15:07:44.000-08:00
Summary: Add support to export XNNPACK based static_llama - static_llama is the QNN backend hybrid/prefill+decode model with KV cache as the inference input - https://www.internalfb.com/code/fbsource/fbcode/executorch/examples/qualcomm/oss_scripts/llama2/model/static_llama.py Differential Revision: D67867190
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
@@ -79,7 +79,7 @@
 verbosity_setting = None
 
 
-EXECUTORCH_DEFINED_MODELS = ["stories110m", "llama2", "llama3", "llama3_1", "llama3_2"]
+EXECUTORCH_DEFINED_MODELS = ["stories110m", "llama2", "llama3", "llama3_1", "llama3_2", "static_llama"]
 TORCHTUNE_DEFINED_MODELS = ["llama3_2_vision"]
 
 
@@ -649,6 +649,7 @@ def _validate_args(args):
             )
 
 
+# TODO: export static_llama via XNNPACK
 def _export_llama(args) -> LLMEdgeManager:  # noqa: C901
     _validate_args(args)
     pt2e_quant_params, quantizers, quant_dtype = get_quantizer_and_quant_params(args)