Skip to content

Commit 78b7bf9

Browse files
mcremon-metafacebook-github-bot
authored andcommitted
Use symmetric weights for convs and int8 in the default quantizer
Summary: As titled. int8 should give better performance with Cadence kernels, since they're not improving uint8 anymore. The upcoming (quantized) convolution kernel needs symmetric weights, so we make that change as well. Differential Revision: D69405797
1 parent 8665a50 commit 78b7bf9

File tree

2 files changed

+41
-61
lines changed

2 files changed

+41
-61
lines changed

backends/cadence/aot/export_example.py

Lines changed: 1 addition & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99
import logging
1010
import tempfile
1111

12-
import torch
13-
1412
from executorch.backends.cadence.aot.ops_registrations import * # noqa
1513
from typing import Any, Tuple
1614

@@ -23,38 +21,15 @@
2321
from executorch.backends.cadence.aot.quantizer.quantizer import CadenceDefaultQuantizer
2422
from executorch.backends.cadence.runtime import runtime
2523
from executorch.backends.cadence.runtime.executor import BundledProgramManager
26-
from executorch.backends.xnnpack.quantizer.xnnpack_quantizer_utils import (
27-
QuantizationConfig,
28-
QuantizationSpec,
29-
)
3024
from executorch.exir import ExecutorchProgramManager
3125
from torch import nn
32-
from torch.ao.quantization.observer import HistogramObserver, MinMaxObserver
3326

3427
from .utils import save_bpte_program, save_pte_program
3528

3629

3730
FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"
3831
logging.basicConfig(level=logging.INFO, format=FORMAT)
3932

40-
act_qspec = QuantizationSpec(
41-
dtype=torch.int8,
42-
quant_min=-128,
43-
quant_max=127,
44-
qscheme=torch.per_tensor_affine,
45-
is_dynamic=False,
46-
observer_or_fake_quant_ctr=HistogramObserver.with_args(eps=2**-12),
47-
)
48-
49-
wgt_qspec = QuantizationSpec(
50-
dtype=torch.int8,
51-
quant_min=-128,
52-
quant_max=127,
53-
qscheme=torch.per_tensor_affine,
54-
is_dynamic=False,
55-
observer_or_fake_quant_ctr=MinMaxObserver,
56-
)
57-
5833

5934
def export_model(
6035
model: nn.Module,
@@ -66,15 +41,8 @@ def export_model(
6641
working_dir = tempfile.mkdtemp(dir="/tmp")
6742
logging.debug(f"Created work directory {working_dir}")
6843

69-
qconfig = QuantizationConfig(
70-
act_qspec,
71-
act_qspec,
72-
wgt_qspec,
73-
None,
74-
)
75-
7644
# Instantiate the quantizer
77-
quantizer = CadenceDefaultQuantizer(qconfig)
45+
quantizer = CadenceDefaultQuantizer()
7846

7947
# Convert the model
8048
converted_model = convert_pt2(model, example_inputs, quantizer)

backends/cadence/aot/quantizer/quantizer.py

Lines changed: 40 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -40,33 +40,48 @@
4040
from torch.ao.quantization.quantizer.composable_quantizer import ComposableQuantizer
4141

4242

43-
act_qspec = QuantizationSpec(
44-
dtype=torch.uint8,
45-
quant_min=0,
46-
quant_max=255,
43+
act_qspec_asym8u = QuantizationSpec(
44+
dtype=torch.int8,
45+
quant_min=-128,
46+
quant_max=127,
4747
qscheme=torch.per_tensor_affine,
4848
is_dynamic=False,
4949
observer_or_fake_quant_ctr=HistogramObserver.with_args(eps=2**-12),
5050
)
5151

52-
wgt_qspec = QuantizationSpec(
53-
dtype=torch.uint8,
54-
quant_min=0,
55-
quant_max=255,
52+
wgt_qspec_asym8u = QuantizationSpec(
53+
dtype=torch.int8,
54+
quant_min=-128,
55+
quant_max=127,
5656
qscheme=torch.per_tensor_affine,
5757
is_dynamic=False,
5858
observer_or_fake_quant_ctr=MinMaxObserver,
5959
)
6060

61+
wgt_qspec_asym8s = QuantizationSpec(
62+
dtype=torch.int8,
63+
quant_min=-128,
64+
quant_max=127,
65+
qscheme=torch.per_tensor_symmetric,
66+
is_dynamic=False,
67+
observer_or_fake_quant_ctr=MinMaxObserver,
68+
)
69+
6170
bias_qspec: Optional[QuantizationSpec] = None
6271

63-
_default_qconfig = QuantizationConfig(
64-
act_qspec,
65-
act_qspec,
66-
wgt_qspec,
72+
qconfig_A8uW8u = QuantizationConfig(
73+
act_qspec_asym8u,
74+
act_qspec_asym8u,
75+
wgt_qspec_asym8u,
6776
None,
6877
)
6978

79+
qconfig_A8uW8s = QuantizationConfig(
80+
act_qspec_asym8u,
81+
act_qspec_asym8u,
82+
wgt_qspec_asym8s,
83+
None,
84+
)
7085

7186
class CadenceAtenQuantizer(Quantizer):
7287
def __init__(
@@ -147,19 +162,17 @@ def get_supported_operators(cls) -> List[OperatorConfig]:
147162
return []
148163

149164

150-
def get_cadence_default_quantizer_list_with_config(
151-
quantization_config: QuantizationConfig,
152-
) -> List[Quantizer]:
165+
def get_cadence_default_quantizers() -> List[Quantizer]:
153166
return [
154-
CadenceAtenQuantizer(AddmmPattern(), quantization_config),
155-
CadenceAtenQuantizer(BmmPattern(), quantization_config),
156-
CadenceAtenQuantizer(Conv1dPattern(), quantization_config),
157-
CadenceAtenQuantizer(Conv2dPattern(), quantization_config),
158-
CadenceAtenQuantizer(LayerNormPattern(), quantization_config),
159-
CadenceAtenQuantizer(LinearPattern(), quantization_config),
160-
CadenceAtenQuantizer(MatmulPattern(), quantization_config),
161-
CadenceAtenQuantizer(ReluPattern0(), quantization_config),
162-
CadenceAtenQuantizer(ReluPattern1(), quantization_config),
167+
CadenceAtenQuantizer(AddmmPattern(), qconfig_A8uW8u),
168+
CadenceAtenQuantizer(BmmPattern(), qconfig_A8uW8u),
169+
CadenceAtenQuantizer(Conv1dPattern(), qconfig_A8uW8s),
170+
CadenceAtenQuantizer(Conv2dPattern(), qconfig_A8uW8s),
171+
CadenceAtenQuantizer(LayerNormPattern(), qconfig_A8uW8u),
172+
CadenceAtenQuantizer(LinearPattern(), qconfig_A8uW8u),
173+
CadenceAtenQuantizer(MatmulPattern(), qconfig_A8uW8u),
174+
CadenceAtenQuantizer(ReluPattern0(), qconfig_A8uW8u),
175+
CadenceAtenQuantizer(ReluPattern1(), qconfig_A8uW8u),
163176
]
164177

165178

@@ -178,10 +191,9 @@ class CadenceDefaultQuantizer(CadenceQuantizer):
178191
Default quantizer for Cadence backend.
179192
"""
180193

181-
def __init__(self, qconfig: Optional[QuantizationConfig] = None) -> None:
182-
if qconfig is None:
183-
qconfig = _default_qconfig
184-
quantizers = get_cadence_default_quantizer_list_with_config(qconfig)
194+
def __init__(self, quantizers: Optional[list[Quantizer]] = None) -> None:
195+
if quantizers is None:
196+
quantizers = get_cadence_default_quantizers()
185197
super().__init__(quantizers)
186198

187199

0 commit comments

Comments
 (0)