Pipe in local_global attention (#10883)

jackzhxng · web-flow · commit 56eb18b78114 · 2025-05-15T13:07:16.000-07:00
Differential Revision: D74762916
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
@@ -719,6 +719,7 @@ def _prepare_for_llama_export(args) -> LLMEdgeManager:
             preq_mode=args.preq_mode,
             preq_group_size=args.preq_group_size,
             preq_embedding_quantize=args.preq_embedding_quantize,
+            local_global_attention=args.local_global_attention,
         )
     )
 
@@ -1447,7 +1448,7 @@ def _get_source_transforms(  # noqa
         transforms.append(
             partial(
                 replace_kv_cache_with_ring_kv_cache,
-                layer_sizes=args.local_global_attention,
+                layer_sizes=local_global_attention,
             )
         )
 

Original file line number	Diff line number	Diff line change
`@@ -719,6 +719,7 @@ def _prepare_for_llama_export(args) -> LLMEdgeManager:`
`719`	`719`	`preq_mode=args.preq_mode,`
`720`	`720`	`preq_group_size=args.preq_group_size,`
`721`	`721`	`preq_embedding_quantize=args.preq_embedding_quantize,`
	`722`	`+ local_global_attention=args.local_global_attention,`
`722`	`723`	`)`
`723`	`724`	`)`
`724`	`725`
`@@ -1447,7 +1448,7 @@ def _get_source_transforms( # noqa`
`1447`	`1448`	`transforms.append(`
`1448`	`1449`	`partial(`
`1449`	`1450`	`replace_kv_cache_with_ring_kv_cache,`
`1450`		`- layer_sizes=args.local_global_attention,`
	`1451`	`+ layer_sizes=local_global_attention,`
`1451`	`1452`	`)`
`1452`	`1453`	`)`
`1453`	`1454`