Skip to content

Commit 34b6b3e

Browse files
authored
Merge branch 'pytorch:main' into add-profiling-to-xnn-executor-runner-2
2 parents 42a8b9c + eaad7ff commit 34b6b3e

34 files changed

+608
-396
lines changed

.github/workflows/pull.yml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,25 @@ jobs:
395395
# Test llama2
396396
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
397397
398+
test-qnn-models-linux:
399+
name: test-qnn-models-linux
400+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
401+
strategy:
402+
fail-fast: false
403+
with:
404+
runner: linux.2xlarge
405+
docker-image: executorch-ubuntu-22.04-qnn-sdk
406+
submodules: 'true'
407+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
408+
timeout: 180
409+
script: |
410+
# The generic Linux job chooses to use base env, not the one setup by the image
411+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
412+
conda activate "${CONDA_ENV}"
413+
414+
# placeholder for running test_qnn_delegate.py, can use matrix such that we can trigger different jobs, refers to test-llama-runner-qnn-linux
415+
# reminder: make sure each job runs fast
416+
398417
test-phi-3-mini-runner-linux:
399418
name: test-phi-3-mini-runner-linux
400419
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main

.lintrunner.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,7 @@ include_patterns = [
294294
'build/**/*.py',
295295
'codegen/**/*.py',
296296
# 'devtools/**/*.py',
297+
'devtools/visualization/**/*.py',
297298
'docs/**/*.py',
298299
# 'examples/**/*.py',
299300
# 'exir/**/*.py',

backends/arm/_passes/fuse_quantized_activation_pass.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,13 @@ def _is_fuseable_quantized_activation(self, node: Node):
1919
is_fuseable = min_val == 0
2020

2121
is_quantized = len(node.users) == 1 and next(iter(node.users)).target == q_op
22-
if is_quantized:
22+
if is_fuseable and is_quantized:
2323
quant_node = next(iter(node.users))
2424
zp = quant_node.args[2]
2525
qmin = quant_node.args[3]
26-
27-
return is_fuseable and is_quantized and zp == qmin
26+
return zp == qmin
27+
else:
28+
return False
2829

2930
def _is_fuseable_input(self, node: Node):
3031
return (

backends/arm/arm_backend.py

Lines changed: 1 addition & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,7 @@ def __init__(self):
4949
self.compiler_flags = []
5050
self.output_format = None
5151
self.path_for_intermediates = None
52-
self.quantize_io = False
53-
self.tosa_spec = None
52+
self.tosa_version = None
5453
self.input_order = None
5554

5655
def ethosu_compile_spec(
@@ -123,24 +122,6 @@ def dump_intermediate_artifacts_to(
123122
self.path_for_intermediates = output_path
124123
return self
125124

126-
def set_quantize_io(self, quantize_io: bool = False) -> "ArmCompileSpecBuilder":
127-
"""
128-
Quantization of inputs and dequantization of outputs for cases where
129-
whole graph is quantized and method signature is not of quantized type.
130-
"""
131-
self.quantize_io = quantize_io
132-
return self
133-
134-
def set_input_order(
135-
self, input_order: Optional[str] = None
136-
) -> "ArmCompileSpecBuilder":
137-
"""
138-
Reorder the inputs coming in. This may be required when inputs > 1.
139-
And while using the U55/U85 CompileSpec.
140-
"""
141-
self.input_order = input_order
142-
return self
143-
144125
def build(self) -> List[CompileSpec]:
145126
"""
146127
Generate a list of compile spec objects from the builder
@@ -170,9 +151,6 @@ def build(self) -> List[CompileSpec]:
170151
)
171152
)
172153

173-
if self.quantize_io:
174-
self.compile_spec.append(CompileSpec("quantize_io", "True".encode()))
175-
176154
return self.compile_spec
177155

178156

@@ -183,13 +161,6 @@ def is_tosa(compile_spec: List[CompileSpec]) -> bool:
183161
return False
184162

185163

186-
def is_quantize_io(compile_specs: List[CompileSpec]) -> bool:
187-
for spec in compile_specs:
188-
if spec.key == "quantize_io" and spec.value.decode() == "True":
189-
return True
190-
return False
191-
192-
193164
def get_tosa_version(compile_spec: List[CompileSpec]) -> TosaSpecification:
194165
for spec in compile_spec:
195166
if spec.key == "tosa_version":

backends/arm/arm_partitioner.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
import torch
1313
from executorch.backends.arm.arm_backend import (
1414
ArmBackend,
15-
is_quantize_io,
1615
) # usort: skip
1716
from executorch.backends.arm.operator_support.tosa_supported_operators import (
1817
TOSASupportedOperators,
@@ -89,9 +88,6 @@ def is_partitioned(node: torch.fx.Node, tag=tag) -> bool:
8988
node.meta["delegation_tag"] = tag
9089
partition_tags[tag] = self.delegation_spec
9190

92-
if not is_quantize_io(self.delegation_spec.compile_specs):
93-
continue
94-
9591
# De-tag outmost q-nodes upwards and dq-nodes downwards.
9692
# De-tag if at least one input/ output is not part of partition.
9793
for node in partition.nodes:

backends/arm/test/common.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -78,46 +78,35 @@ def get_tosa_compile_spec_unbuilt(
7878
ArmCompileSpecBuilder()
7979
.tosa_compile_spec(tosa_spec)
8080
.dump_intermediate_artifacts_to(custom_path)
81-
.set_quantize_io(True)
8281
)
8382

8483
return compile_spec_builder
8584

8685

8786
def get_u55_compile_spec(
88-
quantize_io=True,
8987
custom_path=None,
90-
reorder_inputs=None,
9188
) -> list[CompileSpec]:
9289
"""
9390
Default compile spec for Ethos-U55 tests.
9491
"""
9592
return get_u55_compile_spec_unbuilt(
96-
quantize_io=quantize_io,
9793
custom_path=custom_path,
98-
reorder_inputs=reorder_inputs,
9994
).build()
10095

10196

10297
def get_u85_compile_spec(
103-
quantize_io=True,
10498
custom_path=None,
105-
reorder_inputs=None,
10699
) -> list[CompileSpec]:
107100
"""
108101
Default compile spec for Ethos-U85 tests.
109102
"""
110103
return get_u85_compile_spec_unbuilt(
111-
quantize_io=quantize_io,
112104
custom_path=custom_path,
113-
reorder_inputs=reorder_inputs,
114105
).build()
115106

116107

117108
def get_u55_compile_spec_unbuilt(
118-
quantize_io=True,
119109
custom_path=None,
120-
reorder_inputs=None,
121110
) -> ArmCompileSpecBuilder:
122111
"""Get the ArmCompileSpecBuilder for the Ethos-U55 tests, to modify
123112
the compile spec before calling .build() to finalize it.
@@ -133,17 +122,13 @@ def get_u55_compile_spec_unbuilt(
133122
memory_mode="Shared_Sram",
134123
extra_flags="--debug-force-regor --output-format=raw",
135124
)
136-
.set_quantize_io(quantize_io)
137125
.dump_intermediate_artifacts_to(artifact_path)
138-
.set_input_order(reorder_inputs)
139126
)
140127
return compile_spec
141128

142129

143130
def get_u85_compile_spec_unbuilt(
144-
quantize_io=True,
145131
custom_path=None,
146-
reorder_inputs=None,
147132
) -> list[CompileSpec]:
148133
"""Get the ArmCompileSpecBuilder for the Ethos-U85 tests, to modify
149134
the compile spec before calling .build() to finalize it.
@@ -157,9 +142,7 @@ def get_u85_compile_spec_unbuilt(
157142
memory_mode="Shared_Sram",
158143
extra_flags="--output-format=raw",
159144
)
160-
.set_quantize_io(quantize_io)
161145
.dump_intermediate_artifacts_to(artifact_path)
162-
.set_input_order(reorder_inputs)
163146
)
164147
return compile_spec
165148

backends/arm/test/misc/test_multiple_outputs.py

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@
66

77
import unittest
88

9+
import pytest
910
import torch
10-
from executorch.backends.arm.test import common
11+
from executorch.backends.arm.test import common, conftest
1112
from executorch.backends.arm.test.tester.arm_tester import ArmTester
13+
from executorch.exir.backend.compile_spec_schema import CompileSpec
1214

1315

1416
class TestMultipleOutputs(unittest.TestCase):
@@ -51,3 +53,46 @@ def test_tosa_BI_pipeline(self):
5153
.to_executorch()
5254
.run_method_and_compare_outputs(inputs=inputs, qtol=1.0)
5355
)
56+
57+
def _test_ethosu_BI_pipeline(
58+
self,
59+
module: torch.nn.Module,
60+
test_data: tuple[torch.Tensor],
61+
compile_spec: CompileSpec,
62+
):
63+
tester = (
64+
ArmTester(
65+
module,
66+
example_inputs=test_data,
67+
compile_spec=compile_spec,
68+
)
69+
.quantize()
70+
.export()
71+
.to_edge_transform_and_lower()
72+
.to_executorch()
73+
.serialize()
74+
)
75+
if conftest.is_option_enabled("corstone_fvp"):
76+
tester.run_method_and_compare_outputs(qtol=1, inputs=test_data)
77+
78+
@pytest.mark.corstone_fvp
79+
def test_u85_BI(self):
80+
module = self.MultipleOutputsModule()
81+
test_data = module.get_inputs()
82+
self._test_ethosu_BI_pipeline(
83+
module,
84+
test_data,
85+
common.get_u85_compile_spec(),
86+
)
87+
88+
@pytest.mark.corstone_fvp
89+
@conftest.expectedFailureOnFVP
90+
# TODO MLETORCH-598
91+
def test_u55_BI(self):
92+
module = self.MultipleOutputsModule()
93+
test_data = module.get_inputs()
94+
self._test_ethosu_BI_pipeline(
95+
module,
96+
test_data,
97+
common.get_u55_compile_spec(),
98+
)

backends/arm/test/ops/test_depthwise_conv.py

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -259,58 +259,48 @@ def test_dw_conv_tosa_BI(self, test_name: str, model: torch.nn.Module):
259259

260260
@parameterized.expand(testsuite_conv2d[:4], skip_on_empty=True)
261261
@pytest.mark.corstone_fvp
262-
def test_dw_conv2d_u55_BI(
263-
self, test_name: str, model: torch.nn.Module, set_quantize_io: bool = True
264-
):
262+
def test_dw_conv2d_u55_BI(self, test_name: str, model: torch.nn.Module):
265263
self._test_dw_conv_ethos_BI_pipeline(
266264
model,
267-
common.get_u55_compile_spec(quantize_io=set_quantize_io),
265+
common.get_u55_compile_spec(),
268266
model.get_inputs(),
269267
)
270268

271269
@parameterized.expand(testsuite_conv2d[4:], skip_on_empty=True)
272270
@pytest.mark.corstone_fvp
273271
@conftest.expectedFailureOnFVP # TODO: MLETORCH-516
274-
def test_dw_conv2d_u55_BI_xfails(
275-
self, test_name: str, model: torch.nn.Module, set_quantize_io: bool = False
276-
):
272+
def test_dw_conv2d_u55_BI_xfails(self, test_name: str, model: torch.nn.Module):
277273
self._test_dw_conv_ethos_BI_pipeline(
278274
model,
279-
common.get_u55_compile_spec(quantize_io=set_quantize_io),
275+
common.get_u55_compile_spec(),
280276
model.get_inputs(),
281277
)
282278

283279
@parameterized.expand(testsuite_conv1d, skip_on_empty=True)
284280
@pytest.mark.corstone_fvp
285-
def test_dw_conv1d_u55_BI(
286-
self, test_name: str, model: torch.nn.Module, set_quantize_io: bool = True
287-
):
281+
def test_dw_conv1d_u55_BI(self, test_name: str, model: torch.nn.Module):
288282
self._test_dw_conv_ethos_BI_pipeline(
289283
model,
290-
common.get_u55_compile_spec(quantize_io=set_quantize_io),
284+
common.get_u55_compile_spec(),
291285
model.get_inputs(),
292286
)
293287

294288
@parameterized.expand(testsuite_conv1d + testsuite_conv2d_u85)
295289
@pytest.mark.corstone_fvp
296-
def test_dw_conv_u85_BI(
297-
self, test_name: str, model: torch.nn.Module, set_quantize_io: bool = True
298-
):
290+
def test_dw_conv_u85_BI(self, test_name: str, model: torch.nn.Module):
299291
self._test_dw_conv_ethos_BI_pipeline(
300292
model,
301-
common.get_u85_compile_spec(quantize_io=set_quantize_io),
293+
common.get_u85_compile_spec(),
302294
model.get_inputs(),
303295
)
304296

305297
# All test cases except 3x3_1x3x256x256_gp3_st1 have numerical issues on FVP. MLETORCH-520
306298
@parameterized.expand(testsuite_conv2d_u85_xfails)
307299
@pytest.mark.corstone_fvp
308300
@conftest.expectedFailureOnFVP
309-
def test_dw_conv_u85_BI_xfails(
310-
self, test_name: str, model: torch.nn.Module, set_quantize_io: bool = True
311-
):
301+
def test_dw_conv_u85_BI_xfails(self, test_name: str, model: torch.nn.Module):
312302
self._test_dw_conv_ethos_BI_pipeline(
313303
model,
314-
common.get_u85_compile_spec(quantize_io=set_quantize_io),
304+
common.get_u85_compile_spec(),
315305
model.get_inputs(),
316306
)

0 commit comments

Comments
 (0)