Skip to content

Commit 38e1edc

Browse files
committed
Introduce extension/llm/export_llm (#11746)
Summary: Pull Request resolved: #11746 Introduces frontend of export_llm in extension/llm, while keeping most of the code still in examples/models/llama as a first step. Reviewed By: larryliu0820 Differential Revision: D76781745
1 parent 7bd15b9 commit 38e1edc

File tree

7 files changed

+92
-60
lines changed

7 files changed

+92
-60
lines changed

examples/models/llama/TARGETS

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ runtime.python_binary(
8585
":export_library",
8686
"//caffe2:torch",
8787
"//executorch/extension/pybindings:aten_lib",
88+
"//executorch/extension/llm/export:export_llm_lib",
8889
],
8990
)
9091

@@ -133,8 +134,6 @@ runtime.python_library(
133134
name = "export_library",
134135
srcs = [
135136
"export_llama.py",
136-
"export_llama_args.py",
137-
"export_llama_hydra.py",
138137
"export_llama_lib.py",
139138
"model.py",
140139
],

examples/models/llama/config/llm_config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ class BaseConfig:
8686
checkpoint_dir: Optional[str] = None
8787
tokenizer_path: Optional[str] = None
8888
metadata: Optional[str] = None
89-
use_lora: int = int
89+
use_lora: int = 0
9090
fairseq2: bool = False
9191
preq_mode: Optional[PreqMode] = None
9292
preq_group_size: int = 32
@@ -214,7 +214,7 @@ class ExportConfig:
214214

215215
max_seq_length: int = 128
216216
max_context_length: int = 128
217-
output_dir: Optional[str] = None
217+
output_dir: str = "."
218218
output_name: Optional[str] = None
219219
so_library: Optional[str] = None
220220
export_only: bool = False

examples/models/llama/export_llama.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@
1717

1818
import torch
1919

20+
from executorch.examples.models.llama.export_llama_lib import (
21+
build_args_parser,
22+
export_llama,
23+
)
24+
2025
sys.setrecursionlimit(4096)
2126

2227

@@ -39,15 +44,12 @@ def main() -> None:
3944
sys.argv = [arg for arg in sys.argv if arg != "--hydra"]
4045
print(f"running with {sys.argv}")
4146
runpy.run_module(
42-
"executorch.examples.models.llama.export_llama_hydra", run_name="__main__"
47+
"executorch.extension.llm.export.export_llm", run_name="__main__"
4348
)
4449
else:
45-
# Use the legacy version of the export_llama script which uses argsparse.
46-
from executorch.examples.models.llama.export_llama_args import (
47-
main as export_llama_args_main,
48-
)
49-
50-
export_llama_args_main(remaining_args)
50+
parser = build_args_parser()
51+
remaining_args = parser.parse_args(remaining_args)
52+
export_llama(remaining_args)
5153

5254

5355
if __name__ == "__main__":

examples/models/llama/export_llama_args.py

Lines changed: 0 additions & 21 deletions
This file was deleted.

examples/models/llama/export_llama_hydra.py

Lines changed: 0 additions & 28 deletions
This file was deleted.

extension/llm/export/TARGETS

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,41 @@ runtime.python_library(
4747
],
4848
)
4949

50+
runtime.python_binary(
51+
name = "export_llm",
52+
srcs = [
53+
"export_llm.py",
54+
],
55+
main_function = "executorch.extension.llm.export.export_llm.main",
56+
preload_deps = [
57+
"//executorch/extension/llm/custom_ops:model_sharding_py",
58+
"//executorch/extension/llm/custom_ops:custom_ops_aot_lib",
59+
"//executorch/kernels/quantized:aot_lib",
60+
],
61+
deps = [
62+
"fbsource//third-party/pypi/hydra-core:hydra-core",
63+
"fbsource//third-party/pypi/omegaconf:omegaconf",
64+
"//executorch/examples/models/llama:export_library",
65+
"//executorch/extension/pybindings:aten_lib",
66+
],
67+
)
68+
69+
runtime.python_library(
70+
name = "export_llm_lib",
71+
srcs = [
72+
"export_llm.py",
73+
],
74+
deps = [
75+
"fbsource//third-party/pypi/hydra-core:hydra-core",
76+
"fbsource//third-party/pypi/omegaconf:omegaconf",
77+
"//executorch/examples/models/llama:export_library",
78+
],
79+
visibility = [
80+
"//executorch/examples/...",
81+
"//executorch/extension/llm/...",
82+
],
83+
)
84+
5085
runtime.python_test(
5186
name = "export_passes_test",
5287
srcs = [

extension/llm/export/export_llm.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
"""
8+
Export an LLM with ExecuTorch. Currently follows the following steps:
9+
1. Instantiate our custom PyTorch transformer definition from examples/llama/models/llama_transformer.py.
10+
2. Load weights into the model.
11+
3. Apply source transformations/TorchAO quantization.
12+
4. Export model to intermediate IRs.
13+
5. Graph transformations/PT2E quantization.
14+
6. Partition graph and delegate to backend(s).
15+
7. Export to final ExecuTorch .pte format.
16+
17+
Example usage using full CLI arguments:
18+
python -m extension.llm.export.export_llm \
19+
base.model_class="llama3" \
20+
model.use_sdpa_with_kv_cache=True \
21+
model.use_kv_cache=True \
22+
debug.verbose=True \
23+
backend.xnnpack.enabled=True \
24+
backend.xnnpack.extended_ops=True \
25+
quantization.qmode="8da4w"
26+
"""
27+
28+
import hydra
29+
30+
from executorch.examples.models.llama.config.llm_config import LlmConfig
31+
from executorch.examples.models.llama.export_llama_lib import export_llama
32+
from hydra.core.config_store import ConfigStore
33+
from omegaconf import OmegaConf
34+
35+
cs = ConfigStore.instance()
36+
cs.store(name="llm_config", node=LlmConfig)
37+
38+
39+
@hydra.main(version_base=None, config_path=None, config_name="llm_config")
40+
def main(llm_config: LlmConfig) -> None:
41+
export_llama(OmegaConf.to_object(llm_config))
42+
43+
44+
if __name__ == "__main__":
45+
main()

0 commit comments

Comments
 (0)