pytorch
diff --git a/‎examples/models/llama/export_llama_lib.py
Lines changed: 1 addition & 0 deletions b/‎examples/models/llama/export_llama_lib.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/models/llama2/source_transformation/torchtune/attention.py renamed to ‎examples/models/llama/source_transformation/torchtune/attention.py b/‎examples/models/llama2/source_transformation/torchtune/attention.py renamed to ‎examples/models/llama/source_transformation/torchtune/attention.py
diff --git a/‎examples/models/llama2/source_transformation/torchtune/modules/mha.py renamed to ‎examples/models/llama/source_transformation/torchtune/modules/mha.py b/‎examples/models/llama2/source_transformation/torchtune/modules/mha.py renamed to ‎examples/models/llama/source_transformation/torchtune/modules/mha.py
@@ -962,6 +962,7 @@ def _get_source_transforms(  # noqa
     if args.expand_rope_table:
         transforms.append(materialze_broadcast_of_rope_freq_cis)
 
+    transforms.append(replace_mha_with_inference_mha)
     if args.use_sdpa_with_kv_cache:
         if is_torchtune_model:
             assert (