Skip to content

[Arm tester] Run delegate nodes using tosa_reference_model #7552

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backends/arm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ backends/arm/test/setup_testing.sh
The you can run the tests with

```
pytest -c /dev/null -v -n auto backends/arm/test --arm_quantize_io --arm_run_corstoneFVP
pytest -c /dev/null -v -n auto backends/arm/test --arm_run_corstoneFVP
```

### Code coverage
Expand Down
51 changes: 0 additions & 51 deletions backends/arm/_passes/tag_io_quant_pass.py

This file was deleted.

14 changes: 14 additions & 0 deletions backends/arm/arm_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,20 @@ def is_tosa(compile_spec: List[CompileSpec]) -> bool:
return False


def is_quantize_io(compile_specs: List[CompileSpec]) -> bool:
for spec in compile_specs:
if spec.key == "quantize_io" and spec.value.decode() == "True":
return True
return False


def get_tosa_version(compile_spec: List[CompileSpec]) -> TosaSpecification:
for spec in compile_spec:
if spec.key == "tosa_version":
return TosaSpecification.create_from_string(spec.value.decode())
raise RuntimeError("Could not find TOSA version in CompileSpec")


def get_intermediate_path(compile_spec: List[CompileSpec]) -> Optional[str]:
for spec in compile_spec:
if spec.key == "debug_artifact_path":
Expand Down
63 changes: 48 additions & 15 deletions backends/arm/arm_partitioner.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2023-2024 Arm Limited and/or its affiliates.
# Copyright 2023-2025 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
Expand All @@ -10,8 +10,10 @@
from typing import Callable, final, List, Optional, Tuple

import torch
from executorch.backends.arm.arm_backend import ArmBackend # usort: skip
from executorch.backends.arm._passes.tag_io_quant_pass import TagIOQuantPass
from executorch.backends.arm.arm_backend import (
ArmBackend,
is_quantize_io,
) # usort: skip
from executorch.backends.arm.operator_support.tosa_supported_operators import (
TOSASupportedOperators,
)
Expand All @@ -23,7 +25,7 @@
PartitionResult,
)
from executorch.exir.backend.utils import tag_constant_data
from executorch.exir.passes import PassManager
from executorch.exir.dialects._ops import ops as exir_ops
from torch.export.exported_program import ExportedProgram
from torch.fx.passes.infra.partitioner import CapabilityBasedPartitioner

Expand All @@ -35,6 +37,22 @@
logger.setLevel(logging.INFO)


def is_quant_node(node: torch.fx.node.Node) -> bool:
return node.target in {
exir_ops.edge.quantized_decomposed.quantize_per_channel.default,
exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
exir_ops.edge.quantized_decomposed.quantize_per_tensor.tensor,
}


def is_dequant_node(node: torch.fx.node.Node) -> bool:
return node.target in {
exir_ops.edge.quantized_decomposed.dequantize_per_channel.default,
exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
exir_ops.edge.quantized_decomposed.dequantize_per_tensor.tensor,
}


@final
class ArmPartitioner(Partitioner):
def __init__(self, compile_spec: List[CompileSpec]) -> None:
Expand All @@ -43,6 +61,7 @@ def __init__(self, compile_spec: List[CompileSpec]) -> None:
def partition(self, exported_program: ExportedProgram) -> PartitionResult:
# Run the CapabilityBasedPartitioner to return the largest possible
# subgraphs containing the nodes with the tags

logger.info("ArmPartitioner::partition")
partition_tags = {}

Expand All @@ -52,28 +71,42 @@ def partition(self, exported_program: ExportedProgram) -> PartitionResult:

logger.info(f"Partitioning for {tosa_spec}")

for spec in self.delegation_spec.compile_specs:
if spec.key == "quantize_io" and spec.value.decode() == "True":
# Exclude IO quantization from the partition
passes = PassManager(
passes=[
TagIOQuantPass(),
]
)
passes(exported_program.graph_module)

capability_partitioner = CapabilityBasedPartitioner(
exported_program.graph_module,
TOSASupportedOperators(tosa_spec),
allows_single_node_partition=True,
)
partition_list = capability_partitioner.propose_partitions()
for partition in partition_list:
tag = f"tag{partition.id}"

def is_partitioned(node: torch.fx.Node, tag=tag) -> bool:
return (
"delegation_tag" in node.meta and node.meta["delegation_tag"] == tag
)

for node in partition.nodes:
tag = f"tag{partition.id}"
node.meta["delegation_tag"] = tag
partition_tags[tag] = self.delegation_spec

if not is_quantize_io(self.delegation_spec.compile_specs):
continue

# De-tag outmost q-nodes upwards and dq-nodes downwards.
# De-tag if at least one input/ output is not part of partition.
for node in partition.nodes:
if is_quant_node(node):
for input in node.all_input_nodes:
if not is_partitioned(input):
del node.meta["delegation_tag"]
break

if is_dequant_node(node):
for user in node.users:
if not is_partitioned(user):
del node.meta["delegation_tag"]
break

tag_constant_data(exported_program)

return PartitionResult(
Expand Down
33 changes: 33 additions & 0 deletions backends/arm/scripts/build_quantized_ops_aot_lib.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env bash
# Copyright 2025 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# Needs to be run from exeuctorch root.
# Optional parameter: 1: build_type= "Release" | "Debug" | "RelWithDebInfo"

build_type="Release"

build_type=${1:-$build_type}

SITE_PACKAGES="$(python3 -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
CMAKE_PREFIX_PATH="${SITE_PACKAGES}/torch"

echo "--------------------------------------------------------------------------------"
echo "Build .so library to register quant ops with AoT flow ${build_type} into '$(echo $(pwd))/cmake-out-aot-lib'"
echo "--------------------------------------------------------------------------------"

# Since we only want to build the quantized_aot lib in the specified folder,
# we want exactly the configuration set below and deleting the cache is OK.
rm -f cmake-out-aot-lib/CMakeCache.txt

cmake \
-DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \
-DCMAKE_BUILD_TYPE=${build_type} \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED_AOT=ON \
-Bcmake-out-aot-lib \
.

cmake --build cmake-out-aot-lib --parallel -- quantized_ops_aot_lib
16 changes: 7 additions & 9 deletions backends/arm/test/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
from pathlib import Path

from executorch.backends.arm.arm_backend import ArmCompileSpecBuilder

from executorch.backends.arm.test.conftest import is_option_enabled
from executorch.exir.backend.compile_spec_schema import CompileSpec


Expand Down Expand Up @@ -50,7 +48,6 @@ def maybe_get_tosa_collate_path() -> str | None:
tosa_test_base = os.path.join(tosa_test_base, "tosa-mi")
else:
tosa_test_base = os.path.join(tosa_test_base, "other")

return os.path.join(tosa_test_base, test_class, test_name)

return None
Expand Down Expand Up @@ -78,13 +75,14 @@ def get_tosa_compile_spec_unbuilt(
ArmCompileSpecBuilder()
.tosa_compile_spec(tosa_version)
.dump_intermediate_artifacts_to(custom_path)
.set_quantize_io(True)
)

return compile_spec_builder


def get_u55_compile_spec(
quantize_io=False,
quantize_io=True,
custom_path=None,
reorder_inputs=None,
) -> list[CompileSpec]:
Expand All @@ -99,7 +97,7 @@ def get_u55_compile_spec(


def get_u85_compile_spec(
quantize_io=False,
quantize_io=True,
custom_path=None,
reorder_inputs=None,
) -> list[CompileSpec]:
Expand All @@ -114,7 +112,7 @@ def get_u85_compile_spec(


def get_u55_compile_spec_unbuilt(
quantize_io=False,
quantize_io=True,
custom_path=None,
reorder_inputs=None,
) -> ArmCompileSpecBuilder:
Expand All @@ -132,15 +130,15 @@ def get_u55_compile_spec_unbuilt(
memory_mode="Shared_Sram",
extra_flags="--debug-force-regor --output-format=raw",
)
.set_quantize_io(is_option_enabled("quantize_io") or quantize_io)
.set_quantize_io(quantize_io)
.dump_intermediate_artifacts_to(artifact_path)
.set_input_order(reorder_inputs)
)
return compile_spec


def get_u85_compile_spec_unbuilt(
quantize_io=False,
quantize_io=True,
custom_path=None,
reorder_inputs=None,
) -> list[CompileSpec]:
Expand All @@ -156,7 +154,7 @@ def get_u85_compile_spec_unbuilt(
memory_mode="Shared_Sram",
extra_flags="--output-format=raw",
)
.set_quantize_io(is_option_enabled("quantize_io") or quantize_io)
.set_quantize_io(quantize_io)
.dump_intermediate_artifacts_to(artifact_path)
.set_input_order(reorder_inputs)
)
Expand Down
Loading