Skip to content

Commit 37924ca

Browse files
jackzhxngkirklandsign
authored andcommitted
Add export_llama performance regression test using expected ops (#9158)
### Summary Add a proxy for an `export_llama` performance regression test by comparing the ops in the graph before and after the PR. The export happens without loading a checkpoint or params file, which means that all of the base `ModelArgs` values for `llama_transformer` will be used. ### Test plan N/A
1 parent 0b8df63 commit 37924ca

File tree

4 files changed

+51
-2
lines changed

4 files changed

+51
-2
lines changed

examples/models/llama/model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ def __init__(self, **kwargs):
178178
if checkpoint:
179179
self.model_.checkpoint_dtype = get_checkpoint_dtype(checkpoint)
180180
else:
181-
self.model_.checkpoint_dtype = None
181+
self.model_.checkpoint_dtype = torch.float32
182182

183183
if "int8" in str(checkpoint_path):
184184
print("Using int8 weight-only quantization!")

examples/models/llama/model_args.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
@dataclass
66
class ModelArgs:
77
dim: int = 4096
8-
n_layers: int = 32
8+
n_layers: int = 8
99
n_heads: int = 32
1010
n_kv_heads: Optional[int] = None
1111
vocab_size: int = 512 # Arbitrary value, should be defined later by tokenizer.
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
import unittest
8+
9+
from executorch.devtools.backend_debug import get_delegation_info
10+
from executorch.examples.models.llama.export_llama_lib import (
11+
_export_llama,
12+
build_args_parser,
13+
)
14+
15+
UNWANTED_OPS = [
16+
"aten_permute_copy_default",
17+
"aten_transpose_copy_default",
18+
]
19+
20+
21+
class ExportLlamaLibTest(unittest.TestCase):
22+
def test_has_expected_ops_and_op_counts(self):
23+
"""
24+
Checks the presence of unwanted expensive ops.
25+
26+
Serves as a proxy for a performance regression test, as performance
27+
is directly tied to which and how many of each ops are in the graph.
28+
29+
If this test breaks, please ensure that the difference in ops
30+
is intentional before updating the expected ops.
31+
"""
32+
# Since we aren't loading a checkpoint, it doesn't
33+
# matter what model we specify. Note that
34+
# we cannot test quantization args in this way
35+
# since quantization requires promoting meta tensors
36+
# to device=cpu, which requires real weights.
37+
parser = build_args_parser()
38+
args = parser.parse_args([])
39+
args.use_sdpa_with_kv_cache = True
40+
args.use_kv_cache = True
41+
args.verbose = True
42+
43+
builder = _export_llama(args)
44+
graph_module = builder.edge_manager.exported_program().graph_module
45+
delegation_info = get_delegation_info(graph_module)
46+
47+
for op, _op_info in delegation_info.delegation_by_operator.items():
48+
self.assertTrue(op not in UNWANTED_OPS)

examples/models/llava/model.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ def __init__(
5656
)
5757
self.text_model_args = ModelArgs(
5858
use_kv_cache=True,
59+
n_layers=32,
5960
vocab_size=self.model_.config.text_config.vocab_size,
6061
hidden_dim=self.model_.config.text_config.intermediate_size,
6162
max_batch_size=1, # doesn't work with default batch size 32

0 commit comments

Comments
 (0)