Skip to content

Commit 80ff73f

Browse files
committed
Update base for Update on "Dont quantize the current token for attention"
Differential Revision: [D63497872](https://our.internmc.facebook.com/intern/diff/D63497872/) [ghstack-poisoned]
2 parents bbf7b76 + 92d1d1e commit 80ff73f

File tree

115 files changed

+3062
-2036
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

115 files changed

+3062
-2036
lines changed

.ci/docker/ci_commit_pins/pytorch.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
aec9b2ab77389967ef39bb9c10662fd0fe3e185a
1+
5ba404f68775bb06a1125a100687f86b6d6de6a8

.github/workflows/android-perf.yml

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,8 @@ jobs:
176176
fi
177177
echo "::endgroup::"
178178
179-
build-llm-demo:
180-
name: build-llm-demo
179+
build-benchmark-app:
180+
name: build-benchmark-app
181181
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
182182
needs: set-parameters
183183
with:
@@ -211,7 +211,7 @@ jobs:
211211
uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
212212
needs:
213213
- set-parameters
214-
- build-llm-demo
214+
- build-benchmark-app
215215
- export-models
216216
strategy:
217217
matrix:
@@ -228,10 +228,6 @@ jobs:
228228
# This is the ARN of ExecuTorch project on AWS
229229
project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6
230230
device-pool-arn: ${{ matrix.device }}
231-
# Uploaded to S3 from the previous job, the name of the app comes from the project itself.
232-
# Unlike models there are limited numbers of build flavor for apps, and the model controls whether it should build with bpe/tiktoken tokenizer.
233-
# It's okay to build all possible apps with all possible flavors in job "build-llm-demo". However, in this job, once a model is given, there is only
234-
# one app+flavor that could load and run the model.
235231
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug.apk
236232
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug-androidTest.apk
237233
# NB: Need to set the default spec here so that it works for periodic too

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -727,7 +727,7 @@ if(EXECUTORCH_BUILD_PYBIND)
727727
util PUBLIC ${_common_include_directories} ${TORCH_INCLUDE_DIRS}
728728
)
729729
target_compile_options(util PUBLIC ${_pybind_compile_options})
730-
target_link_libraries(util PRIVATE torch c10 executorch)
730+
target_link_libraries(util PRIVATE torch c10 executorch extension_tensor)
731731

732732
# pybind portable_lib
733733
pybind11_add_module(portable_lib SHARED extension/pybindings/pybindings.cpp)

backends/apple/mps/setup.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,12 +111,12 @@ python3 -m examples.apple.mps.scripts.mps_example --model_name="mv3" --no-use_fp
111111
```
112112

113113
### Profiling:
114-
1. [Optional] Generate an [ETRecord](./sdk-etrecord.rst) while you're exporting your model.
114+
1. [Optional] Generate an [ETRecord](./etrecord.rst) while you're exporting your model.
115115
```bash
116116
cd executorch
117117
python3 -m examples.apple.mps.scripts.mps_example --model_name="mv3" --generate_etrecord -b
118118
```
119-
2. Run your Program on the ExecuTorch runtime and generate an [ETDump](./sdk-etdump.md).
119+
2. Run your Program on the ExecuTorch runtime and generate an [ETDump](./etdump.md).
120120
```
121121
./cmake-out/examples/apple/mps/mps_executor_runner --model_path mv3_mps_bundled_fp16.pte --bundled_program --dump-outputs
122122
```

backends/arm/test/common.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import logging
88
import os
9+
import platform
910
import shutil
1011
import subprocess
1112
import sys
@@ -57,11 +58,17 @@ def pytest_collection_modifyitems(config, items):
5758

5859

5960
def load_libquantized_ops_aot_lib():
61+
so_ext = {
62+
"Darwin": "dylib",
63+
"Linux": "so",
64+
"Windows": "dll",
65+
}.get(platform.system(), None)
66+
6067
find_lib_cmd = [
6168
"find",
6269
"cmake-out-aot-lib",
6370
"-name",
64-
"libquantized_ops_aot_lib.so",
71+
f"libquantized_ops_aot_lib.{so_ext}",
6572
]
6673
res = subprocess.run(find_lib_cmd, capture_output=True)
6774
if res.returncode == 0:

backends/arm/test/models/test_mobilenet_v2_arm.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,11 +100,11 @@ def test_mv2_u55_BI(self):
100100
)
101101
if common.is_option_enabled("corstone300"):
102102
tester.run_method_and_compare_outputs(
103-
atol=1.0, qtol=1, inputs=self.model_inputs
103+
atol=1.0, qtol=1, inputs=self.model_inputs, target_board="corstone-300"
104104
)
105105

106106
def test_mv2_u85_BI(self):
107-
(
107+
tester = (
108108
ArmTester(
109109
self.mv2,
110110
example_inputs=self.model_inputs,
@@ -116,4 +116,9 @@ def test_mv2_u85_BI(self):
116116
.check(list(self.operators_after_quantization))
117117
.partition()
118118
.to_executorch()
119+
.serialize()
119120
)
121+
if common.is_option_enabled("corstone300"):
122+
tester.run_method_and_compare_outputs(
123+
atol=1.0, qtol=1, inputs=self.model_inputs, target_board="corstone-320"
124+
)

backends/arm/test/ops/test_add.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -137,16 +137,22 @@ def test_add_u55_BI(self, test_data: torch.Tensor):
137137
test_data,
138138
)
139139
if common.is_option_enabled("corstone300"):
140-
tester.run_method_and_compare_outputs(qtol=1, inputs=test_data)
140+
tester.run_method_and_compare_outputs(
141+
qtol=1, inputs=test_data, target_board="corstone-300"
142+
)
141143

142144
@parameterized.expand(Add.test_parameters)
143145
def test_add_u85_BI(self, test_data: torch.Tensor):
144146
test_data = (test_data,)
145-
self._test_add_ethos_BI_pipeline(
147+
tester = self._test_add_ethos_BI_pipeline(
146148
self.Add(),
147149
common.get_u85_compile_spec(permute_memory_to_nhwc=True),
148150
test_data,
149151
)
152+
if common.is_option_enabled("corstone300"):
153+
tester.run_method_and_compare_outputs(
154+
qtol=1, inputs=test_data, target_board="corstone-320"
155+
)
150156

151157
@parameterized.expand(Add2.test_parameters)
152158
def test_add2_tosa_MI(self, operand1: torch.Tensor, operand2: torch.Tensor):
@@ -165,11 +171,17 @@ def test_add2_u55_BI(self, operand1: torch.Tensor, operand2: torch.Tensor):
165171
self.Add2(), common.get_u55_compile_spec(), test_data
166172
)
167173
if common.is_option_enabled("corstone300"):
168-
tester.run_method_and_compare_outputs(qtol=1, inputs=test_data)
174+
tester.run_method_and_compare_outputs(
175+
qtol=1, inputs=test_data, target_board="corstone-300"
176+
)
169177

170178
@parameterized.expand(Add2.test_parameters)
171179
def test_add2_u85_BI(self, operand1: torch.Tensor, operand2: torch.Tensor):
172180
test_data = (operand1, operand2)
173-
self._test_add_ethos_BI_pipeline(
181+
tester = self._test_add_ethos_BI_pipeline(
174182
self.Add2(), common.get_u85_compile_spec(), test_data
175183
)
184+
if common.is_option_enabled("corstone300"):
185+
tester.run_method_and_compare_outputs(
186+
qtol=1, inputs=test_data, target_board="corstone-320"
187+
)

backends/arm/test/ops/test_conv_combos.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010
from typing import Tuple
1111

12+
import pytest
13+
1214
import torch
1315
from executorch.backends.arm.test import common
1416
from executorch.backends.arm.test.tester.arm_tester import ArmTester
@@ -311,6 +313,8 @@ def test_block_bottleneck_residual_tosa_MI(self):
311313
model = ComboBlockBottleneckResidual()
312314
self._test_conv_combo_tosa_MI_pipeline(model, model.get_inputs())
313315

316+
# TODO: Investigate flakyness (MLTORCH-307)
317+
@pytest.mark.flaky(reruns=3)
314318
def test_block_bottleneck_residual_tosa_BI(self):
315319
model = ComboBlockBottleneckResidual()
316320
self._test_conv_combo_tosa_BI_pipeline(model, model.get_inputs())

backends/arm/test/ops/test_split.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,11 @@ def test_split_with_sizes_tosa_MI(self, test_data: test_data_t):
124124
self._test_split_tosa_MI_pipeline(self.SplitWithSizes(), test_data)
125125

126126
@parameterized.expand(Split.test_data)
127-
def test_split_n_out_tosa_MI(self, test_data: test_data_t):
127+
def test_split_one_out_tosa_MI(self, test_data: test_data_t):
128128
self._test_split_tosa_MI_pipeline(self.SplitSingleOut(), test_data)
129+
130+
@parameterized.expand(Split.test_data)
131+
def test_split_two_out_tosa_MI(self, test_data: test_data_t):
129132
self._test_split_tosa_MI_pipeline(self.SplitTwoOut(), test_data)
130133

131134
@parameterized.expand(Split.test_data)

backends/arm/test/runner_utils.py

Lines changed: 70 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ def __init__(
177177
self.qp_input: list[QuantizationParams] = None
178178
self.qp_output: QuantizationParams = None
179179
self.timeout = 120
180+
self.target_board: str = None
180181

181182
self._has_init_run = False
182183

@@ -185,11 +186,17 @@ def init_run(
185186
exported_program: ExportedProgram,
186187
edge_program: ExportedProgram,
187188
is_quantized: bool,
189+
target_board: str,
188190
):
191+
192+
if target_board not in ["corstone-300", "corstone-320"]:
193+
raise RuntimeError(f"Unknown target board: {target_board}")
194+
189195
self.input_names = _get_input_names(edge_program)
190196
self.output_node = _get_output_node(exported_program)
191197
self.output_name = self.output_node.name
192198
self.is_quantized = is_quantized
199+
self.target_board = target_board
193200

194201
if is_quantized:
195202
self.qp_input = _get_input_quantization_params(exported_program)
@@ -205,7 +212,7 @@ def init_run(
205212
def set_timeout(self, timeout: int):
206213
self.timeout = timeout
207214

208-
def run_corstone300(
215+
def run_corstone(
209216
self,
210217
inputs: Tuple[torch.Tensor],
211218
) -> list[torch.Tensor]:
@@ -231,7 +238,7 @@ def run_corstone300(
231238
)
232239
elf_path = os.path.join(
233240
"cmake-out",
234-
"arm_semihosting_executor_runner_corstone-300",
241+
f"arm_semihosting_executor_runner_{self.target_board}",
235242
"arm_executor_runner",
236243
)
237244
assert os.path.exists(
@@ -242,32 +249,66 @@ def run_corstone300(
242249
for input_path in input_paths:
243250
cmd_line += f" -i {input_path}"
244251

245-
command_args = [
246-
"FVP_Corstone_SSE-300_Ethos-U55",
247-
"-C",
248-
"ethosu.num_macs=128",
249-
"-C",
250-
"mps3_board.visualisation.disable-visualisation=1",
251-
"-C",
252-
"mps3_board.telnetterminal0.start_telnet=0",
253-
"-C",
254-
"mps3_board.uart0.out_file='-'",
255-
"-C",
256-
"cpu0.CFGITCMSZ=11",
257-
"-C",
258-
"cpu0.semihosting-enable=1",
259-
"-C",
260-
"cpu0.semihosting-stack_base=0",
261-
"-C",
262-
"cpu0.semihosting-heap_limit=0",
263-
"-C",
264-
f"cpu0.semihosting-cmd_line='{cmd_line}'",
265-
"-a",
266-
elf_path,
267-
"--timelimit",
268-
f"{self.timeout}",
269-
]
270-
result = _run_cmd(command_args, check=False)
252+
command_args = {
253+
"corstone-300": [
254+
"FVP_Corstone_SSE-300_Ethos-U55",
255+
"-C",
256+
"ethosu.num_macs=128",
257+
"-C",
258+
"mps3_board.visualisation.disable-visualisation=1",
259+
"-C",
260+
"mps3_board.telnetterminal0.start_telnet=0",
261+
"-C",
262+
"mps3_board.uart0.out_file='-'",
263+
"-C",
264+
"cpu0.CFGITCMSZ=11",
265+
"-C",
266+
"cpu0.semihosting-enable=1",
267+
"-C",
268+
"cpu0.semihosting-stack_base=0",
269+
"-C",
270+
"cpu0.semihosting-heap_limit=0",
271+
"-C",
272+
f"cpu0.semihosting-cmd_line='{cmd_line}'",
273+
"-a",
274+
elf_path,
275+
"--timelimit",
276+
f"{self.timeout}",
277+
],
278+
"corstone-320": [
279+
"FVP_Corstone_SSE-320",
280+
"-C",
281+
"mps4_board.subsystem.ethosu.num_macs=128",
282+
"-C",
283+
"mps4_board.visualisation.disable-visualisation=1",
284+
"-C",
285+
"mps4_board.telnetterminal0.start_telnet=0",
286+
"-C",
287+
"mps4_board.uart0.out_file='-'",
288+
"-C",
289+
"mps4_board.uart0.unbuffered_output=1",
290+
"-C",
291+
"mps4_board.uart0.shutdown_on_eot=1",
292+
"-C",
293+
"mps4_board.subsystem.cpu0.semihosting-enable=1",
294+
"-C",
295+
"mps4_board.subsystem.cpu0.semihosting-stack_base=0",
296+
"-C",
297+
"mps4_board.subsystem.cpu0.semihosting-heap_limit=0",
298+
"-C",
299+
f"mps4_board.subsystem.cpu0.semihosting-cmd_line='{cmd_line}'",
300+
"-a",
301+
elf_path,
302+
"--timelimit",
303+
f"{self.timeout}",
304+
],
305+
}
306+
307+
result = _run_cmd(command_args[self.target_board], check=False)
308+
if result.returncode != 0:
309+
raise RuntimeError(
310+
f"Failed to run {command_args[self.target_board]}\nError: {result.stderr.decode()}"
311+
)
271312
result_stdout = result.stdout.decode()
272313

273314
error_regex = r"(^[EF][: ].*$)|(^.*Hard fault.*$)|(^.*Assertion.*$)"
@@ -276,10 +317,8 @@ def run_corstone300(
276317
# regex to check for error or fault messages in stdout from FVP
277318
if re.compile(error_regex, re.MULTILINE).search(result_stdout):
278319
raise RuntimeError(
279-
f"Corstone simulation failed, log: \n {result_stdout}\n{result.stderr.decode()}"
320+
f"Corstone simulation failed:\ncmd: {command_args[self.target_board]}\n, log: \n {result_stdout}\n{result.stderr.decode()}"
280321
)
281-
elif "E [" in result_stdout:
282-
logger.error(result_stdout)
283322

284323
tosa_ref_output = np.fromfile(out_path_with_suffix, dtype=np.float32)
285324
output_shape = self.output_node.args[0][0].meta["val"].shape

backends/arm/test/tester/arm_tester.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def __init__(self, runner_util: RunnerUtil, timeout: int = 1):
9898
self.runner.set_timeout(timeout)
9999

100100
def run_artifact(self, inputs):
101-
return self.runner.run_corstone300(inputs)
101+
return self.runner.run_corstone(inputs)
102102

103103
def dump_artifact(self, path_to_dump: Optional[str]):
104104
if not path_to_dump:
@@ -226,6 +226,7 @@ def run_method_and_compare_outputs(
226226
self,
227227
inputs: Optional[Tuple[torch.Tensor]] = None,
228228
stage: Optional[str] = None,
229+
target_board: Optional[str] = "corstone-300",
229230
num_runs=1,
230231
atol=1e-03,
231232
rtol=1e-03,
@@ -260,7 +261,12 @@ def run_method_and_compare_outputs(
260261
edge_program = self.stages[
261262
self.stage_name(tester.ToEdge)
262263
].artifact.exported_program()
263-
self.runner_util.init_run(exported_program, edge_program, is_quantized)
264+
self.runner_util.init_run(
265+
exported_program,
266+
edge_program,
267+
is_quantized,
268+
target_board,
269+
)
264270

265271
if is_quantized:
266272
reference_stage = self.stages[self.stage_name(tester.Quantize)]

backends/qualcomm/TARGETS

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,18 @@
1+
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
12
load(":targets.bzl", "define_common_targets")
23

34
oncall("executorch")
45

56
define_common_targets()
7+
8+
runtime.python_library(
9+
name = "preprocess",
10+
srcs = ["qnn_preprocess.py"],
11+
visibility = [
12+
"//executorch/backends/qualcomm/...",
13+
"@EXECUTORCH_CLIENTS",
14+
],
15+
deps = [
16+
"//executorch/backends/qualcomm/passes:passes",
17+
],
18+
)

0 commit comments

Comments
 (0)