remove short-term quant

mcr229 · facebook-github-bot · commit 03429b63278e · 2023-09-13T15:38:57.000-07:00
Differential Revision: D48761858

fbshipit-source-id: 374766e873a9c48f5fe63851d2c603681fe0db6c
diff --git a/backends/xnnpack/test/models/mobilenet_v2.py b/backends/xnnpack/test/models/mobilenet_v2.py
@@ -17,7 +17,7 @@
 from torchvision.models.mobilenetv2 import MobileNet_V2_Weights
 
 
-class TestXNNPACKMobileNetV2(unittest.TestCase):
+class TestMobileNetV2(unittest.TestCase):
     mv2 = models.mobilenetv2.mobilenet_v2(weights=MobileNet_V2_Weights)
     mv2 = mv2.eval()
     model_inputs = (torch.ones(1, 3, 224, 244),)
@@ -32,7 +32,7 @@ class TestXNNPACKMobileNetV2(unittest.TestCase):
         "executorch_exir_dialects_edge__ops_aten_convolution_default",
     }
 
-    def test_mv2_fp32(self):
+    def test_fp32_mv2(self):
 
         (
             Tester(self.mv2, self.model_inputs)
@@ -48,15 +48,15 @@ def test_mv2_fp32(self):
             .compare_outputs()
         )
 
-    def test_mv2_qs8_pt2e(self):
+    def test_qs8_mv2(self):
         # Quantization fuses away batchnorm, so it is no longer in the graph
         ops_after_quantization = self.all_operators - {
             "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default",
         }
 
         (
             Tester(self.mv2, self.model_inputs)
-            .quantize2()
+            .quantize()
             .export(Export(CaptureConfig(enable_aot=True)))
             .to_edge()
             .check(list(ops_after_quantization))
diff --git a/backends/xnnpack/test/models/mobilenet_v3.py b/backends/xnnpack/test/models/mobilenet_v3.py
@@ -16,7 +16,7 @@
 from executorch.exir import CaptureConfig
 
 
-class TestXNNPACKMobileNetV3(unittest.TestCase):
+class TestMobileNetV3(unittest.TestCase):
     mv3 = models.mobilenetv3.mobilenet_v3_small(pretrained=True)
     mv3 = mv3.eval()
     model_inputs = (torch.ones(1, 3, 224, 244),)
@@ -35,7 +35,7 @@ class TestXNNPACKMobileNetV3(unittest.TestCase):
         "executorch_exir_dialects_edge__ops_aten_mean_dim",
     }
 
-    def test_mv3_fp32(self):
+    def test_fp32_mv3(self):
         (
             Tester(self.mv3, self.model_inputs)
             .export(Export(CaptureConfig(enable_aot=True)))
@@ -50,7 +50,7 @@ def test_mv3_fp32(self):
             .compare_outputs()
         )
 
-    def test_mv3_qs8_pt2e(self):
+    def test_qs8_mv3(self):
         ops_after_quantization = self.all_operators - {
             "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default",
         }
@@ -66,7 +66,7 @@ def test_mv3_qs8_pt2e(self):
 
         (
             Tester(self.mv3, self.model_inputs)
-            .quantize2()
+            .quantize()
             .export(Export(CaptureConfig(enable_aot=True)))
             .to_edge()
             .check(list(ops_after_quantization))
diff --git a/backends/xnnpack/test/ops/add.py b/backends/xnnpack/test/ops/add.py
@@ -46,7 +46,7 @@ def test_qs8_add(self):
         inputs = (torch.ones(1), torch.ones(1))
         (
             Tester(self.Add(), inputs)
-            .quantize2()
+            .quantize()
             .export()
             .check_count({"torch.ops.aten.add.Tensor": 4})
             .check(["torch.ops.quantized_decomposed"])
@@ -91,7 +91,7 @@ def test_qs8_add_relu(self):
         inputs = (torch.randn(1, 1, 4, 4), torch.randn(1, 1, 4, 4))
         (
             Tester(self.AddRelu(), inputs)
-            .quantize2()
+            .quantize()
             .export()
             .check_count({"torch.ops.aten.add.Tensor": 1})
             .check_count({"torch.ops.aten.relu.default": 1})
diff --git a/backends/xnnpack/test/tester/tester.py b/backends/xnnpack/test/tester/tester.py
@@ -30,22 +30,9 @@
 from executorch.exir.backend.partitioner import Partitioner
 from executorch.exir.passes.spec_prop_pass import SpecPropPass
 
-from executorch.extension.pybindings.portable_lib import (
+from executorch.extension.pybindings.portable_lib import (  # @manual
     _load_for_executorch_from_buffer,
 )
-from torch.ao.quantization.backend_config import BackendConfig
-from torch.ao.quantization.backend_config.executorch import (
-    get_executorch_backend_config,
-)
-from torch.ao.quantization.qconfig_mapping import (
-    _get_symmetric_qnnpack_qconfig_mapping,
-    QConfigMapping,
-)
-
-from torch.ao.quantization.quantize_fx import (
-    _convert_to_reference_decomposed_fx,
-    prepare_fx,
-)
 from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
 from torch.ao.quantization.quantizer.quantizer import Quantizer
 from torch.ao.quantization.quantizer.xnnpack_quantizer import (
@@ -103,36 +90,6 @@ def register_stage(stage: Stage):
 
 @register_stage
 class Quantize(Stage):
-    def __init__(
-        self,
-        qconfig_mapping: Optional[QConfigMapping] = None,
-        backend_config: Optional[BackendConfig] = None,
-    ):
-        self.qconfig_mapping = (
-            qconfig_mapping or _get_symmetric_qnnpack_qconfig_mapping()
-        )
-        self.backend_config = backend_config or get_executorch_backend_config()
-        self.converted = None
-
-    def run(self, artifact: torch.nn.Module, inputs: Tuple[torch.Tensor]) -> None:
-        prepared = prepare_fx(
-            artifact, self.qconfig_mapping, inputs, backend_config=self.backend_config
-        )
-        self.converted = _convert_to_reference_decomposed_fx(
-            prepared, backend_config=self.backend_config
-        )
-
-    @property
-    def artifact(self) -> torch.fx.GraphModule:
-        return self.converted
-
-    @property
-    def graph_module(self) -> str:
-        return self.converted
-
-
-@register_stage
-class Quantize2(Stage):
     def __init__(
         self,
         quantizer: Optional[Quantizer] = None,
@@ -278,7 +235,6 @@ def __init__(
         self.inputs = inputs
         self.stages: Dict[str, Stage] = OrderedDict.fromkeys(list(_stages_.keys()))
         self.pipeline = {
-            self._stage_name(Quantize2): [self._stage_name(Export)],
             self._stage_name(Quantize): [self._stage_name(Export)],
             self._stage_name(Export): [
                 self._stage_name(ToEdge),
@@ -339,9 +295,6 @@ def quantize(self, quantize_stage: Optional[Quantize] = None):
     def export(self, export_stage: Optional[Export] = None):
         return self._run_stage(export_stage or Export(), self.inputs)
 
-    def quantize2(self, quantize_stage: Optional[Quantize2] = None):
-        return self._run_stage(quantize_stage or Quantize2(), self.inputs)
-
     def to_edge(self, to_edge_stage: Optional[ToEdge] = None):
         return self._run_stage(to_edge_stage or ToEdge())
 

Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,7 @@`
`17`	`17`	`from torchvision.models.mobilenetv2 import MobileNet_V2_Weights`
`18`	`18`
`19`	`19`
`20`		`-class TestXNNPACKMobileNetV2(unittest.TestCase):`
	`20`	`+class TestMobileNetV2(unittest.TestCase):`
`21`	`21`	`mv2 = models.mobilenetv2.mobilenet_v2(weights=MobileNet_V2_Weights)`
`22`	`22`	`mv2 = mv2.eval()`
`23`	`23`	`model_inputs = (torch.ones(1, 3, 224, 244),)`
`@@ -32,7 +32,7 @@ class TestXNNPACKMobileNetV2(unittest.TestCase):`
`32`	`32`	`"executorch_exir_dialects_edge__ops_aten_convolution_default",`
`33`	`33`	`}`
`34`	`34`
`35`		`- def test_mv2_fp32(self):`
	`35`	`+ def test_fp32_mv2(self):`
`36`	`36`
`37`	`37`	`(`
`38`	`38`	`Tester(self.mv2, self.model_inputs)`
`@@ -48,15 +48,15 @@ def test_mv2_fp32(self):`
`48`	`48`	`.compare_outputs()`
`49`	`49`	`)`
`50`	`50`
`51`		`- def test_mv2_qs8_pt2e(self):`
	`51`	`+ def test_qs8_mv2(self):`
`52`	`52`	`# Quantization fuses away batchnorm, so it is no longer in the graph`
`53`	`53`	`ops_after_quantization = self.all_operators - {`
`54`	`54`	`"executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default",`
`55`	`55`	`}`
`56`	`56`
`57`	`57`	`(`
`58`	`58`	`Tester(self.mv2, self.model_inputs)`
`59`		`- .quantize2()`
	`59`	`+ .quantize()`
`60`	`60`	`.export(Export(CaptureConfig(enable_aot=True)))`
`61`	`61`	`.to_edge()`
`62`	`62`	`.check(list(ops_after_quantization))`
Original file line number	Diff line number	Diff line change
`@@ -16,7 +16,7 @@`
`16`	`16`	`from executorch.exir import CaptureConfig`
`17`	`17`
`18`	`18`
`19`		`-class TestXNNPACKMobileNetV3(unittest.TestCase):`
	`19`	`+class TestMobileNetV3(unittest.TestCase):`
`20`	`20`	`mv3 = models.mobilenetv3.mobilenet_v3_small(pretrained=True)`
`21`	`21`	`mv3 = mv3.eval()`
`22`	`22`	`model_inputs = (torch.ones(1, 3, 224, 244),)`
`@@ -35,7 +35,7 @@ class TestXNNPACKMobileNetV3(unittest.TestCase):`
`35`	`35`	`"executorch_exir_dialects_edge__ops_aten_mean_dim",`
`36`	`36`	`}`
`37`	`37`
`38`		`- def test_mv3_fp32(self):`
	`38`	`+ def test_fp32_mv3(self):`
`39`	`39`	`(`
`40`	`40`	`Tester(self.mv3, self.model_inputs)`
`41`	`41`	`.export(Export(CaptureConfig(enable_aot=True)))`
`@@ -50,7 +50,7 @@ def test_mv3_fp32(self):`
`50`	`50`	`.compare_outputs()`
`51`	`51`	`)`
`52`	`52`
`53`		`- def test_mv3_qs8_pt2e(self):`
	`53`	`+ def test_qs8_mv3(self):`
`54`	`54`	`ops_after_quantization = self.all_operators - {`
`55`	`55`	`"executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default",`
`56`	`56`	`}`
`@@ -66,7 +66,7 @@ def test_mv3_qs8_pt2e(self):`
`66`	`66`
`67`	`67`	`(`
`68`	`68`	`Tester(self.mv3, self.model_inputs)`
`69`		`- .quantize2()`
	`69`	`+ .quantize()`
`70`	`70`	`.export(Export(CaptureConfig(enable_aot=True)))`
`71`	`71`	`.to_edge()`
`72`	`72`	`.check(list(ops_after_quantization))`