Skip to content

Commit 90684ba

Browse files
mcremon-metafacebook-github-bot
authored andcommitted
Use int8 quantizer in the OSS flow
Summary: As titled. This change add the ability to supply a qconfig to the `CadenceQuantizer`, and uses `int8` instead of `uint8` in `export_model`. Differential Revision: D64209639
1 parent 5696b35 commit 90684ba

File tree

2 files changed

+45
-7
lines changed

2 files changed

+45
-7
lines changed

backends/cadence/aot/export_example.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,42 @@
1717
export_to_cadence,
1818
fuse_pt2,
1919
)
20+
2021
from executorch.backends.cadence.aot.quantizer.quantizer import CadenceQuantizer
2122
from executorch.backends.cadence.runtime import runtime
2223
from executorch.backends.cadence.runtime.executor import BundledProgramManager
2324
from executorch.exir import ExecutorchProgramManager
2425
from torch import nn
26+
from torch.ao.quantization.observer import HistogramObserver, MinMaxObserver
27+
from torch.ao.quantization.quantizer.xnnpack_quantizer_utils import (
28+
QuantizationConfig,
29+
QuantizationSpec,
30+
)
2531

2632
from .utils import save_bpte_program, save_pte_program
2733

2834

2935
FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"
3036
logging.basicConfig(level=logging.INFO, format=FORMAT)
3137

38+
act_qspec = QuantizationSpec(
39+
dtype=torch.int8,
40+
quant_min=-128,
41+
quant_max=127,
42+
qscheme=torch.per_tensor_affine,
43+
is_dynamic=False,
44+
observer_or_fake_quant_ctr=HistogramObserver.with_args(eps=2**-12),
45+
)
46+
47+
wgt_qspec = QuantizationSpec(
48+
dtype=torch.int8,
49+
quant_min=-128,
50+
quant_max=127,
51+
qscheme=torch.per_tensor_affine,
52+
is_dynamic=False,
53+
observer_or_fake_quant_ctr=MinMaxObserver,
54+
)
55+
3256

3357
def export_model(
3458
model: nn.Module,
@@ -39,8 +63,15 @@ def export_model(
3963
working_dir = tempfile.mkdtemp(dir="/tmp")
4064
logging.debug(f"Created work directory {working_dir}")
4165

66+
qconfig = QuantizationConfig(
67+
act_qspec,
68+
act_qspec,
69+
wgt_qspec,
70+
None,
71+
)
72+
4273
# Instantiate the quantizer
43-
quantizer = CadenceQuantizer()
74+
quantizer = CadenceQuantizer(qconfig)
4475

4576
# Convert the model
4677
converted_model = convert_pt2(model, example_inputs, quantizer)

backends/cadence/aot/quantizer/quantizer.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -141,13 +141,20 @@ def get_supported_operators(cls) -> List[OperatorConfig]:
141141

142142

143143
class CadenceQuantizer(ComposableQuantizer):
144-
def __init__(self) -> None:
145-
static_qconfig = QuantizationConfig(
146-
act_qspec,
147-
act_qspec,
148-
wgt_qspec,
149-
None,
144+
def __init__(
145+
self, quantization_config: Optional[QuantizationConfig] = None
146+
) -> None:
147+
static_qconfig = (
148+
QuantizationConfig(
149+
act_qspec,
150+
act_qspec,
151+
wgt_qspec,
152+
None,
153+
)
154+
if not quantization_config
155+
else quantization_config
150156
)
157+
151158
super().__init__(
152159
[
153160
CadenceAtenQuantizer(AddmmPattern(), static_qconfig),

0 commit comments

Comments
 (0)