Skip to content

migrate convert/prepare to torchao #11015

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions .lintrunner.toml
Original file line number Diff line number Diff line change
Expand Up @@ -378,3 +378,31 @@ command = [
'--',
'@{{PATHSFILE}}',
]

[[linter]]
code = "TORCH_AO_IMPORT"
include_patterns = ["**/*.py"]
exclude_patterns = [
"third-party/**",
]

command = [
"python3",
"-m",
"lintrunner_adapters",
"run",
"grep_linter",
"--pattern=\\bfrom torch\\.ao\\.quantization\\.(?:quantize_pt2e)(?:\\.[A-Za-z0-9_]+)*\\b",
"--linter-name=TorchAOImport",
"--error-name=Prohibited torch.ao.quantization import",
"""--error-description=\
Imports from torch.ao.quantization are not allowed. \
Please import from torchao.quantization.pt2e instead.\n \
* torchao.quantization.pt2e (includes all the utils, including observers, fake quants etc.) \n \
* torchao.quantization.pt2e.quantizer (quantizer related objects and utils) \n \
* torchao.quantization.pt2e.quantize_pt2e (prepare_pt2e, prepare_qat_pt2e, convert_pt2e) \n\n \
If you need something from torch.ao.quantization, you can add your file to an exclude_patterns for TORCH_AO_IMPORT in .lintrunner.toml. \
""",
"--",
"@{{PATHSFILE}}",
]
3 changes: 3 additions & 0 deletions .mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,6 @@ ignore_missing_imports = True

[mypy-zstd]
ignore_missing_imports = True

[mypy-torchao.*]
follow_untyped_imports = True
4 changes: 2 additions & 2 deletions backends/apple/coreml/test/test_coreml_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
)

from executorch.backends.apple.coreml.quantizer import CoreMLQuantizer
from torch.ao.quantization.quantize_pt2e import (
from torch.export import export_for_training
from torchao.quantization.pt2e.quantize_pt2e import (
convert_pt2e,
prepare_pt2e,
prepare_qat_pt2e,
)
from torch.export import export_for_training


class TestCoreMLQuantizer:
Expand Down
4 changes: 2 additions & 2 deletions backends/cadence/aot/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@
from executorch.exir.passes.sym_shape_eval_pass import HintBasedSymShapeEvalPass
from executorch.exir.program._program import to_edge_with_preserved_ops
from torch._inductor.decomposition import remove_decompositions
from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e

from torch.export.exported_program import ExportedProgram
from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e

from .passes import get_cadence_passes

Expand Down Expand Up @@ -123,7 +123,7 @@ def prepare_and_convert_pt2(
assert isinstance(model_gm, torch.fx.GraphModule)

# Prepare
prepared_model = prepare_pt2e(model_gm, quantizer)
prepared_model = prepare_pt2e(model_gm, quantizer) # pyre-ignore[6]

# Calibrate
# If no calibration data is provided, use the inputs
Expand Down
2 changes: 1 addition & 1 deletion backends/cortex_m/test/test_replace_quant_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@
)
from executorch.exir.dialects._ops import ops as exir_ops
from torch.ao.quantization.observer import HistogramObserver
from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
from torch.ao.quantization.quantizer.quantizer import (
QuantizationAnnotation,
QuantizationSpec,
Quantizer,
)
from torch.export import export, export_for_training
from torch.fx import GraphModule
from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e


@dataclass(eq=True, frozen=True)
Expand Down
2 changes: 1 addition & 1 deletion backends/example/example_partitioner.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
)
from executorch.exir.dialects._ops import ops as exir_ops
from executorch.exir.graph_module import get_control_flow_submodules
from torch.ao.quantization.pt2e.graph_utils import find_sequential_partitions
from torch.export import ExportedProgram
from torch.fx.passes.operator_support import OperatorSupportBase
from torchao.quantization.pt2e.graph_utils import find_sequential_partitions


@final
Expand Down
11 changes: 7 additions & 4 deletions backends/example/example_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,14 @@

import torch
from executorch.backends.example.example_operators.ops import module_to_annotator
from executorch.backends.xnnpack.quantizer.xnnpack_quantizer_utils import OperatorConfig
from torch import fx
from torch.ao.quantization.observer import HistogramObserver, MinMaxObserver
from torch.ao.quantization.pt2e.graph_utils import find_sequential_partitions
from torch.ao.quantization.quantizer import QuantizationSpec, Quantizer
from torchao.quantization.pt2e.graph_utils import find_sequential_partitions
from torchao.quantization.pt2e.observer import HistogramObserver, MinMaxObserver
Comment on lines +13 to +14
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we want to import from pt2e directly? or do you feel specifying graph_utils / observer are better

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like graph_utils/observer personally

But people can use top-level too if they want

from torchao.quantization.pt2e.quantizer import (
OperatorConfig,
QuantizationSpec,
Quantizer,
)


def get_uint8_tensor_spec(observer_or_fake_quant_ctr):
Expand Down
4 changes: 2 additions & 2 deletions backends/example/test_example_delegate.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@
DuplicateDequantNodePass,
)
from executorch.exir.delegate import executorch_call_delegate

from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
from torch.export import export

from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e

from torchvision.models.quantization import mobilenet_v2


Expand Down
2 changes: 1 addition & 1 deletion backends/nxp/tests/executorch_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
to_edge_transform_and_lower,
)
from torch import nn
from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e


def _quantize_model(model, calibration_inputs: list[tuple[torch.Tensor]]):
Expand Down
2 changes: 1 addition & 1 deletion backends/nxp/tests/test_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import executorch.backends.nxp.tests.models as models
import torch
from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer
from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e


def _get_target_name(node):
Expand Down
4 changes: 2 additions & 2 deletions backends/qualcomm/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,12 @@
from executorch.exir.pass_base import ExportPass
from executorch.exir.passes.memory_planning_pass import MemoryPlanningPass
from executorch.exir.program import ExecutorchProgram, ExecutorchProgramManager
from torch.ao.quantization.quantize_pt2e import (
from torch.fx.passes.infra.pass_base import PassResult
from torchao.quantization.pt2e.quantize_pt2e import (
convert_pt2e,
prepare_pt2e,
prepare_qat_pt2e,
)
from torch.fx.passes.infra.pass_base import PassResult


def generate_context_binary(
Expand Down
2 changes: 1 addition & 1 deletion backends/qualcomm/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,8 +607,8 @@ def skip_annotation(
from executorch.backends.qualcomm.serialization.qc_schema_serialize import (
flatbuffer_to_option,
)
from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
from torch.fx.passes.infra.partitioner import CapabilityBasedPartitioner
from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e

def prepare_subgm(subgm, subgm_name):
# prepare current submodule for quantization annotation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@
get_symmetric_quantization_config,
XNNPACKQuantizer,
)
from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e

# TODO: Move away from using torch's internal testing utils
from torch.testing._internal.common_quantization import (
NodeSpec as ns,
QuantizationTestCase,
TestHelperModules,
)
from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e


class MyTestHelperModules:
Expand Down
4 changes: 2 additions & 2 deletions backends/vulkan/test/test_vulkan_delegate.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@
ExecutorchProgramManager,
)

from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e

from torch.ao.quantization.quantizer import Quantizer
from torch.export import Dim, export, export_for_training, ExportedProgram

from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e

ctypes.CDLL("libvulkan.so.1")


Expand Down
4 changes: 2 additions & 2 deletions backends/vulkan/test/test_vulkan_passes.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
from executorch.exir.backend.canonical_partitioners.config_partitioner import (
format_target_name,
)

from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
from torch.ao.quantization.quantizer import Quantizer

from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e

###################
## Common Models ##
###################
Expand Down
2 changes: 1 addition & 1 deletion backends/xnnpack/test/ops/test_check_quant_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
)
from executorch.backends.xnnpack.utils.utils import get_param_tensor
from executorch.exir import to_edge_transform_and_lower
from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
from torch.export import export_for_training
from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e


class TestCheckQuantParams(unittest.TestCase):
Expand Down
10 changes: 5 additions & 5 deletions backends/xnnpack/test/quantizer/test_pt2e_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,6 @@
weight_observer_range_neg_127_to_127,
)
from torch.ao.quantization.qconfig_mapping import QConfigMapping
from torch.ao.quantization.quantize_pt2e import (
convert_pt2e,
prepare_pt2e,
prepare_qat_pt2e,
)
from torch.ao.quantization.quantizer import Quantizer
from torch.ao.quantization.quantizer.composable_quantizer import ComposableQuantizer
from torch.ao.quantization.quantizer.embedding_quantizer import EmbeddingQuantizer
Expand All @@ -51,6 +46,11 @@
TemporaryFileName,
TestCase,
)
from torchao.quantization.pt2e.quantize_pt2e import (
convert_pt2e,
prepare_pt2e,
prepare_qat_pt2e,
)


class TestQuantizePT2E(PT2EQuantizationTestCase):
Expand Down
2 changes: 1 addition & 1 deletion backends/xnnpack/test/quantizer/test_representation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
XNNPACKQuantizer,
)
from torch._higher_order_ops.out_dtype import out_dtype # noqa: F401
from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
from torch.ao.quantization.quantizer import Quantizer
from torch.export import export_for_training
from torch.testing._internal.common_quantization import (
Expand All @@ -17,6 +16,7 @@
skipIfNoQNNPACK,
TestHelperModules,
)
from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e


@skipIfNoQNNPACK
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
convert_to_reference_fx,
prepare_fx,
)
from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
from torch.export import export_for_training
from torch.testing._internal.common_quantization import (
NodeSpec as ns,
Expand All @@ -38,6 +37,7 @@
TestHelperModules,
)
from torch.testing._internal.common_quantized import override_quantized_engine
from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e


@skipIfNoQNNPACK
Expand Down
4 changes: 2 additions & 2 deletions backends/xnnpack/test/test_xnnpack_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,12 @@
_convert_to_reference_decomposed_fx,
prepare_fx,
)

from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
from torch.export import export_for_training

from torch.testing import FileCheck

from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e


def randomize_bn(num_features: int, dimensionality: int = 2) -> torch.nn.Module:
if dimensionality == 1:
Expand Down
10 changes: 5 additions & 5 deletions backends/xnnpack/test/tester/tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,15 @@
)
from executorch.exir.program._program import _transform
from torch._export.pass_base import PassType
from torch.ao.quantization.quantize_pt2e import (
convert_pt2e,
prepare_pt2e,
prepare_qat_pt2e,
)
from torch.ao.quantization.quantizer.quantizer import Quantizer
from torch.export import export, ExportedProgram
from torch.testing import FileCheck
from torch.utils._pytree import tree_flatten
from torchao.quantization.pt2e.quantize_pt2e import (
convert_pt2e,
prepare_pt2e,
prepare_qat_pt2e,
)


class Stage(ABC):
Expand Down
4 changes: 2 additions & 2 deletions docs/source/backends-arm-ethos-u.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ from executorch.exir import (
ExecutorchBackendConfig,
to_edge_transform_and_lower,
)
from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
from torchvision.models import mobilenetv2

mobilenet_v2 = mobilenetv2.mobilenet_v2(
Expand Down Expand Up @@ -98,4 +98,4 @@ Finally, run the elf file on FVP using the script
`executorch/backends/arm/scripts/run_fvp.sh --elf=executorch/mv2_arm_ethos_u55/cmake-out/arm_executor_runner --target=ethos-u55-128`.

## See Also
- [Arm Ethos-U Backend Tutorial](tutorial-arm-ethos-u.md)
- [Arm Ethos-U Backend Tutorial](tutorial-arm-ethos-u.md)
2 changes: 1 addition & 1 deletion docs/source/backends-coreml.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ import torchvision.models as models
from torchvision.models.mobilenetv2 import MobileNet_V2_Weights
from executorch.backends.apple.coreml.quantizer import CoreMLQuantizer
from executorch.backends.apple.coreml.partition import CoreMLPartitioner
from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
from executorch.exir import to_edge_transform_and_lower
from executorch.backends.apple.coreml.compiler import CoreMLBackend

Expand Down
8 changes: 4 additions & 4 deletions docs/source/backends-xnnpack.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# XNNPACK Backend

The XNNPACK delegate is the ExecuTorch solution for CPU execution on mobile CPUs. [XNNPACK](https://github.com/google/XNNPACK/tree/master) is a library that provides optimized kernels for machine learning operators on Arm and x86 CPUs.
The XNNPACK delegate is the ExecuTorch solution for CPU execution on mobile CPUs. [XNNPACK](https://github.com/google/XNNPACK/tree/master) is a library that provides optimized kernels for machine learning operators on Arm and x86 CPUs.

## Features

Expand All @@ -18,7 +18,7 @@ The XNNPACK delegate is the ExecuTorch solution for CPU execution on mobile CPUs

## Development Requirements

The XNNPACK delegate does not introduce any development system requirements beyond those required by
The XNNPACK delegate does not introduce any development system requirements beyond those required by
the core ExecuTorch runtime.

----
Expand Down Expand Up @@ -63,7 +63,7 @@ After generating the XNNPACK-delegated .pte, the model can be tested from Python

## Quantization

The XNNPACK delegate can also be used as a backend to execute symmetrically quantized models. To quantize a PyTorch model for the XNNPACK backend, use the `XNNPACKQuantizer`. `Quantizers` are backend specific, which means the `XNNPACKQuantizer` is configured to quantize models to leverage the quantized operators offered by the XNNPACK Library.
The XNNPACK delegate can also be used as a backend to execute symmetrically quantized models. To quantize a PyTorch model for the XNNPACK backend, use the `XNNPACKQuantizer`. `Quantizers` are backend specific, which means the `XNNPACKQuantizer` is configured to quantize models to leverage the quantized operators offered by the XNNPACK Library.

### Supported Quantization Schemes
The XNNPACK delegate supports the following quantization schemes:
Expand Down Expand Up @@ -94,7 +94,7 @@ from torchvision.models.mobilenetv2 import MobileNet_V2_Weights
from executorch.backends.xnnpack.quantizer.xnnpack_quantizer import XNNPACKQuantizer
from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner
from executorch.exir import to_edge_transform_and_lower
from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
from torch.ao.quantization.quantizer.xnnpack_quantizer import get_symmetric_quantization_config

model = models.mobilenetv2.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).eval()
Expand Down
2 changes: 1 addition & 1 deletion docs/source/llm/getting-started.md
Original file line number Diff line number Diff line change
Expand Up @@ -619,7 +619,7 @@ from executorch.backends.xnnpack.quantizer.xnnpack_quantizer import (
get_symmetric_quantization_config,
XNNPACKQuantizer,
)
from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
```

```python
Expand Down
2 changes: 1 addition & 1 deletion docs/source/tutorial-xnnpack-delegate-lowering.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ sample_inputs = (torch.randn(1, 3, 224, 224), )

mobilenet_v2 = export_for_training(mobilenet_v2, sample_inputs).module() # 2-stage export for quantization path

from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
from executorch.backends.xnnpack.quantizer.xnnpack_quantizer import (
get_symmetric_quantization_config,
XNNPACKQuantizer,
Expand Down
Loading
Loading