pytorch · facebook-github-bot · May 21, 2025 · May 20, 2025 · May 20, 2025 · May 20, 2025
diff --git a/.lintrunner.toml b/.lintrunner.toml
@@ -378,3 +378,31 @@ command = [
     '--',
     '@{{PATHSFILE}}',
 ]
+
+[[linter]]
+code = "TORCH_AO_IMPORT"
+include_patterns = ["**/*.py"]
+exclude_patterns = [
+    "third-party/**",
+]
+
+command = [
+  "python3",
+  "-m",
+  "lintrunner_adapters",
+  "run",
+  "grep_linter",
+  "--pattern=\\bfrom torch\\.ao\\.quantization\\.(?:quantize_pt2e)(?:\\.[A-Za-z0-9_]+)*\\b",
+  "--linter-name=TorchAOImport",
+  "--error-name=Prohibited torch.ao.quantization import",
+  """--error-description=\
+  Imports from torch.ao.quantization are not allowed. \
+  Please import from torchao.quantization.pt2e instead.\n \
+  * torchao.quantization.pt2e (includes all the utils, including observers, fake quants etc.) \n \
+  * torchao.quantization.pt2e.quantizer (quantizer related objects and utils) \n \
+  * torchao.quantization.pt2e.quantize_pt2e (prepare_pt2e, prepare_qat_pt2e, convert_pt2e) \n\n \
+  If you need something from torch.ao.quantization, you can add your file to an exclude_patterns for TORCH_AO_IMPORT in .lintrunner.toml. \
+  """,
+  "--",
+  "@{{PATHSFILE}}",
+]
diff --git a/.mypy.ini b/.mypy.ini
@@ -97,3 +97,6 @@ ignore_missing_imports = True
 
 [mypy-zstd]
 ignore_missing_imports = True
+
+[mypy-torchao.*]
+follow_untyped_imports = True
@@ -15,12 +15,12 @@
 )
 
 from executorch.backends.apple.coreml.quantizer import CoreMLQuantizer
-from torch.ao.quantization.quantize_pt2e import (
+from torch.export import export_for_training
+from torchao.quantization.pt2e.quantize_pt2e import (
     convert_pt2e,
     prepare_pt2e,
     prepare_qat_pt2e,
 )
-from torch.export import export_for_training
 
 
 class TestCoreMLQuantizer:

diff --git a/backends/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py
@@ -37,9 +37,9 @@
 from executorch.exir.passes.sym_shape_eval_pass import HintBasedSymShapeEvalPass
 from executorch.exir.program._program import to_edge_with_preserved_ops
 from torch._inductor.decomposition import remove_decompositions
-from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
 
 from torch.export.exported_program import ExportedProgram
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
 
 from .passes import get_cadence_passes
 
@@ -123,7 +123,7 @@ def prepare_and_convert_pt2(
     assert isinstance(model_gm, torch.fx.GraphModule)
 
     # Prepare
-    prepared_model = prepare_pt2e(model_gm, quantizer)
+    prepared_model = prepare_pt2e(model_gm, quantizer)  # pyre-ignore[6]
 
     # Calibrate
     # If no calibration data is provided, use the inputs

diff --git a/backends/cortex_m/test/test_replace_quant_nodes.py b/backends/cortex_m/test/test_replace_quant_nodes.py
@@ -17,14 +17,14 @@
 )
 from executorch.exir.dialects._ops import ops as exir_ops
 from torch.ao.quantization.observer import HistogramObserver
-from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
 from torch.ao.quantization.quantizer.quantizer import (
     QuantizationAnnotation,
     QuantizationSpec,
     Quantizer,
 )
 from torch.export import export, export_for_training
 from torch.fx import GraphModule
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
 
 
 @dataclass(eq=True, frozen=True)

@@ -19,9 +19,9 @@
 )
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.graph_module import get_control_flow_submodules
-from torch.ao.quantization.pt2e.graph_utils import find_sequential_partitions
 from torch.export import ExportedProgram
 from torch.fx.passes.operator_support import OperatorSupportBase
+from torchao.quantization.pt2e.graph_utils import find_sequential_partitions
 
 
 @final

@@ -9,11 +9,14 @@
 
 import torch
 from executorch.backends.example.example_operators.ops import module_to_annotator
-from executorch.backends.xnnpack.quantizer.xnnpack_quantizer_utils import OperatorConfig
 from torch import fx
-from torch.ao.quantization.observer import HistogramObserver, MinMaxObserver
-from torch.ao.quantization.pt2e.graph_utils import find_sequential_partitions
-from torch.ao.quantization.quantizer import QuantizationSpec, Quantizer
+from torchao.quantization.pt2e.graph_utils import find_sequential_partitions
+from torchao.quantization.pt2e.observer import HistogramObserver, MinMaxObserver
+from torchao.quantization.pt2e.quantizer import (
+    OperatorConfig,
+    QuantizationSpec,
+    Quantizer,
+)
 
 
 def get_uint8_tensor_spec(observer_or_fake_quant_ctr):

@@ -17,10 +17,10 @@
     DuplicateDequantNodePass,
 )
 from executorch.exir.delegate import executorch_call_delegate
-
-from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
 from torch.export import export
 
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
+
 from torchvision.models.quantization import mobilenet_v2
 
 

diff --git a/backends/nxp/tests/executorch_pipeline.py b/backends/nxp/tests/executorch_pipeline.py
@@ -20,7 +20,7 @@
     to_edge_transform_and_lower,
 )
 from torch import nn
-from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
 
 
 def _quantize_model(model, calibration_inputs: list[tuple[torch.Tensor]]):

diff --git a/backends/nxp/tests/test_quantizer.py b/backends/nxp/tests/test_quantizer.py
@@ -8,7 +8,7 @@
 import executorch.backends.nxp.tests.models as models
 import torch
 from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer
-from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
 
 
 def _get_target_name(node):

@@ -44,12 +44,12 @@
 from executorch.exir.pass_base import ExportPass
 from executorch.exir.passes.memory_planning_pass import MemoryPlanningPass
 from executorch.exir.program import ExecutorchProgram, ExecutorchProgramManager
-from torch.ao.quantization.quantize_pt2e import (
+from torch.fx.passes.infra.pass_base import PassResult
+from torchao.quantization.pt2e.quantize_pt2e import (
     convert_pt2e,
     prepare_pt2e,
     prepare_qat_pt2e,
 )
-from torch.fx.passes.infra.pass_base import PassResult
 
 
 def generate_context_binary(

@@ -607,8 +607,8 @@ def skip_annotation(
     from executorch.backends.qualcomm.serialization.qc_schema_serialize import (
         flatbuffer_to_option,
     )
-    from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
     from torch.fx.passes.infra.partitioner import CapabilityBasedPartitioner
+    from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
 
     def prepare_subgm(subgm, subgm_name):
         # prepare current submodule for quantization annotation

@@ -15,14 +15,14 @@
     get_symmetric_quantization_config,
     XNNPACKQuantizer,
 )
-from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
 
 # TODO: Move away from using torch's internal testing utils
 from torch.testing._internal.common_quantization import (
     NodeSpec as ns,
     QuantizationTestCase,
     TestHelperModules,
 )
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
 
 
 class MyTestHelperModules:

@@ -24,11 +24,11 @@
     ExecutorchProgramManager,
 )
 
-from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
-
 from torch.ao.quantization.quantizer import Quantizer
 from torch.export import Dim, export, export_for_training, ExportedProgram
 
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
+
 ctypes.CDLL("libvulkan.so.1")
 
 

@@ -16,10 +16,10 @@
 from executorch.exir.backend.canonical_partitioners.config_partitioner import (
     format_target_name,
 )
-
-from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
 from torch.ao.quantization.quantizer import Quantizer
 
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
+
 ###################
 ## Common Models ##
 ###################

@@ -9,8 +9,8 @@
 )
 from executorch.backends.xnnpack.utils.utils import get_param_tensor
 from executorch.exir import to_edge_transform_and_lower
-from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
 from torch.export import export_for_training
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
 
 
 class TestCheckQuantParams(unittest.TestCase):

@@ -32,11 +32,6 @@
     weight_observer_range_neg_127_to_127,
 )
 from torch.ao.quantization.qconfig_mapping import QConfigMapping
-from torch.ao.quantization.quantize_pt2e import (
-    convert_pt2e,
-    prepare_pt2e,
-    prepare_qat_pt2e,
-)
 from torch.ao.quantization.quantizer import Quantizer
 from torch.ao.quantization.quantizer.composable_quantizer import ComposableQuantizer
 from torch.ao.quantization.quantizer.embedding_quantizer import EmbeddingQuantizer
@@ -51,6 +46,11 @@
     TemporaryFileName,
     TestCase,
 )
+from torchao.quantization.pt2e.quantize_pt2e import (
+    convert_pt2e,
+    prepare_pt2e,
+    prepare_qat_pt2e,
+)
 
 
 class TestQuantizePT2E(PT2EQuantizationTestCase):

@@ -8,7 +8,6 @@
     XNNPACKQuantizer,
 )
 from torch._higher_order_ops.out_dtype import out_dtype  # noqa: F401
-from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
 from torch.ao.quantization.quantizer import Quantizer
 from torch.export import export_for_training
 from torch.testing._internal.common_quantization import (
@@ -17,6 +16,7 @@
     skipIfNoQNNPACK,
     TestHelperModules,
 )
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
 
 
 @skipIfNoQNNPACK

@@ -28,7 +28,6 @@
     convert_to_reference_fx,
     prepare_fx,
 )
-from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
 from torch.export import export_for_training
 from torch.testing._internal.common_quantization import (
     NodeSpec as ns,
@@ -38,6 +37,7 @@
     TestHelperModules,
 )
 from torch.testing._internal.common_quantized import override_quantized_engine
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
 
 
 @skipIfNoQNNPACK

@@ -70,12 +70,12 @@
     _convert_to_reference_decomposed_fx,
     prepare_fx,
 )
-
-from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
 from torch.export import export_for_training
 
 from torch.testing import FileCheck
 
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
+
 
 def randomize_bn(num_features: int, dimensionality: int = 2) -> torch.nn.Module:
     if dimensionality == 1:

@@ -55,15 +55,15 @@
 )
 from executorch.exir.program._program import _transform
 from torch._export.pass_base import PassType
-from torch.ao.quantization.quantize_pt2e import (
-    convert_pt2e,
-    prepare_pt2e,
-    prepare_qat_pt2e,
-)
 from torch.ao.quantization.quantizer.quantizer import Quantizer
 from torch.export import export, ExportedProgram
 from torch.testing import FileCheck
 from torch.utils._pytree import tree_flatten
+from torchao.quantization.pt2e.quantize_pt2e import (
+    convert_pt2e,
+    prepare_pt2e,
+    prepare_qat_pt2e,
+)
 
 
 class Stage(ABC):

@@ -33,7 +33,7 @@ from executorch.exir import (
     ExecutorchBackendConfig,
     to_edge_transform_and_lower,
 )
-from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
 from torchvision.models import mobilenetv2
 
 mobilenet_v2 = mobilenetv2.mobilenet_v2(
@@ -98,4 +98,4 @@ Finally, run the elf file on FVP using the script
 `executorch/backends/arm/scripts/run_fvp.sh --elf=executorch/mv2_arm_ethos_u55/cmake-out/arm_executor_runner --target=ethos-u55-128`.
 
 ## See Also
-- [Arm Ethos-U Backend Tutorial](tutorial-arm-ethos-u.md)
+- [Arm Ethos-U Backend Tutorial](tutorial-arm-ethos-u.md)
@@ -104,7 +104,7 @@ import torchvision.models as models
 from torchvision.models.mobilenetv2 import MobileNet_V2_Weights
 from executorch.backends.apple.coreml.quantizer import CoreMLQuantizer
 from executorch.backends.apple.coreml.partition import CoreMLPartitioner
-from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
 from executorch.exir import to_edge_transform_and_lower
 from executorch.backends.apple.coreml.compiler import CoreMLBackend
 

@@ -1,6 +1,6 @@
 # XNNPACK Backend
 
-The XNNPACK delegate is the ExecuTorch solution for CPU execution on mobile CPUs. [XNNPACK](https://github.com/google/XNNPACK/tree/master) is a library that provides optimized kernels for machine learning operators on Arm and x86 CPUs. 
+The XNNPACK delegate is the ExecuTorch solution for CPU execution on mobile CPUs. [XNNPACK](https://github.com/google/XNNPACK/tree/master) is a library that provides optimized kernels for machine learning operators on Arm and x86 CPUs.
 
 ## Features
 
@@ -18,7 +18,7 @@ The XNNPACK delegate is the ExecuTorch solution for CPU execution on mobile CPUs
 
 ## Development Requirements
 
-The XNNPACK delegate does not introduce any development system requirements beyond those required by 
+The XNNPACK delegate does not introduce any development system requirements beyond those required by
 the core ExecuTorch runtime.
 
 ----
@@ -63,7 +63,7 @@ After generating the XNNPACK-delegated .pte, the model can be tested from Python
 
 ## Quantization
 
-The XNNPACK delegate can also be used as a backend to execute symmetrically quantized models. To quantize a PyTorch model for the XNNPACK backend, use the `XNNPACKQuantizer`. `Quantizers` are backend specific, which means the `XNNPACKQuantizer` is configured to quantize models to leverage the quantized operators offered by the XNNPACK Library. 
+The XNNPACK delegate can also be used as a backend to execute symmetrically quantized models. To quantize a PyTorch model for the XNNPACK backend, use the `XNNPACKQuantizer`. `Quantizers` are backend specific, which means the `XNNPACKQuantizer` is configured to quantize models to leverage the quantized operators offered by the XNNPACK Library.
 
 ### Supported Quantization Schemes
 The XNNPACK delegate supports the following quantization schemes:
@@ -94,7 +94,7 @@ from torchvision.models.mobilenetv2 import MobileNet_V2_Weights
 from executorch.backends.xnnpack.quantizer.xnnpack_quantizer import XNNPACKQuantizer
 from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner
 from executorch.exir import to_edge_transform_and_lower
-from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
 from torch.ao.quantization.quantizer.xnnpack_quantizer import get_symmetric_quantization_config
 
 model = models.mobilenetv2.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).eval()

@@ -619,7 +619,7 @@ from executorch.backends.xnnpack.quantizer.xnnpack_quantizer import (
     get_symmetric_quantization_config,
     XNNPACKQuantizer,
 )
-from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
 ```
 
 ```python

@@ -85,7 +85,7 @@ sample_inputs = (torch.randn(1, 3, 224, 224), )
 
 mobilenet_v2 = export_for_training(mobilenet_v2, sample_inputs).module() # 2-stage export for quantization path
 
-from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
 from executorch.backends.xnnpack.quantizer.xnnpack_quantizer import (
     get_symmetric_quantization_config,
     XNNPACKQuantizer,