9
9
import logging
10
10
import tempfile
11
11
12
+ import torch
13
+
12
14
from executorch .backends .cadence .aot .ops_registrations import * # noqa
13
15
from typing import Any , Tuple
14
16
17
19
export_to_cadence_edge_executorch ,
18
20
fuse_pt2 ,
19
21
)
22
+
20
23
from executorch .backends .cadence .aot .quantizer .quantizer import CadenceQuantizer
21
24
from executorch .backends .cadence .runtime import runtime
22
25
from executorch .backends .cadence .runtime .executor import BundledProgramManager
23
26
from executorch .exir import ExecutorchProgramManager
24
27
from torch import nn
28
+ from torch .ao .quantization .observer import HistogramObserver , MinMaxObserver
29
+ from torch .ao .quantization .quantizer .xnnpack_quantizer_utils import (
30
+ QuantizationConfig ,
31
+ QuantizationSpec ,
32
+ )
25
33
26
34
from .utils import save_bpte_program , save_pte_program
27
35
28
36
29
37
FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"
30
38
logging .basicConfig (level = logging .INFO , format = FORMAT )
31
39
40
+ act_qspec = QuantizationSpec (
41
+ dtype = torch .int8 ,
42
+ quant_min = - 128 ,
43
+ quant_max = 127 ,
44
+ qscheme = torch .per_tensor_affine ,
45
+ is_dynamic = False ,
46
+ observer_or_fake_quant_ctr = HistogramObserver .with_args (eps = 2 ** - 12 ),
47
+ )
48
+
49
+ wgt_qspec = QuantizationSpec (
50
+ dtype = torch .int8 ,
51
+ quant_min = - 128 ,
52
+ quant_max = 127 ,
53
+ qscheme = torch .per_tensor_affine ,
54
+ is_dynamic = False ,
55
+ observer_or_fake_quant_ctr = MinMaxObserver ,
56
+ )
57
+
32
58
33
59
def export_model (
34
60
model : nn .Module ,
@@ -39,8 +65,15 @@ def export_model(
39
65
working_dir = tempfile .mkdtemp (dir = "/tmp" )
40
66
logging .debug (f"Created work directory { working_dir } " )
41
67
68
+ qconfig = QuantizationConfig (
69
+ act_qspec ,
70
+ act_qspec ,
71
+ wgt_qspec ,
72
+ None ,
73
+ )
74
+
42
75
# Instantiate the quantizer
43
- quantizer = CadenceQuantizer ()
76
+ quantizer = CadenceQuantizer (qconfig )
44
77
45
78
# Convert the model
46
79
converted_model = convert_pt2 (model , example_inputs , quantizer )
0 commit comments