Skip to content

Use int8 quantizer in the OSS flow #6166

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 34 additions & 1 deletion backends/cadence/aot/export_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import logging
import tempfile

import torch

from executorch.backends.cadence.aot.ops_registrations import * # noqa
from typing import Any, Tuple

Expand All @@ -17,18 +19,42 @@
export_to_cadence_edge_executorch,
fuse_pt2,
)

from executorch.backends.cadence.aot.quantizer.quantizer import CadenceQuantizer
from executorch.backends.cadence.runtime import runtime
from executorch.backends.cadence.runtime.executor import BundledProgramManager
from executorch.exir import ExecutorchProgramManager
from torch import nn
from torch.ao.quantization.observer import HistogramObserver, MinMaxObserver
from torch.ao.quantization.quantizer.xnnpack_quantizer_utils import (
QuantizationConfig,
QuantizationSpec,
)

from .utils import save_bpte_program, save_pte_program


FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"
logging.basicConfig(level=logging.INFO, format=FORMAT)

act_qspec = QuantizationSpec(
dtype=torch.int8,
quant_min=-128,
quant_max=127,
qscheme=torch.per_tensor_affine,
is_dynamic=False,
observer_or_fake_quant_ctr=HistogramObserver.with_args(eps=2**-12),
)

wgt_qspec = QuantizationSpec(
dtype=torch.int8,
quant_min=-128,
quant_max=127,
qscheme=torch.per_tensor_affine,
is_dynamic=False,
observer_or_fake_quant_ctr=MinMaxObserver,
)


def export_model(
model: nn.Module,
Expand All @@ -39,8 +65,15 @@ def export_model(
working_dir = tempfile.mkdtemp(dir="/tmp")
logging.debug(f"Created work directory {working_dir}")

qconfig = QuantizationConfig(
act_qspec,
act_qspec,
wgt_qspec,
None,
)

# Instantiate the quantizer
quantizer = CadenceQuantizer()
quantizer = CadenceQuantizer(qconfig)

# Convert the model
converted_model = convert_pt2(model, example_inputs, quantizer)
Expand Down
19 changes: 13 additions & 6 deletions backends/cadence/aot/quantizer/quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,13 +141,20 @@ def get_supported_operators(cls) -> List[OperatorConfig]:


class CadenceQuantizer(ComposableQuantizer):
def __init__(self) -> None:
static_qconfig = QuantizationConfig(
act_qspec,
act_qspec,
wgt_qspec,
None,
def __init__(
self, quantization_config: Optional[QuantizationConfig] = None
) -> None:
static_qconfig = (
QuantizationConfig(
act_qspec,
act_qspec,
wgt_qspec,
None,
)
if not quantization_config
else quantization_config
)

super().__init__(
[
CadenceAtenQuantizer(AddmmPattern(), static_qconfig),
Expand Down
Loading