8
8
import os
9
9
import shutil
10
10
import subprocess
11
- from itertools import islice
12
11
from pathlib import Path
13
12
14
13
import executorch
15
14
16
- import nncf
15
+ import nncf . torch
17
16
import numpy as np
18
17
import timm
19
18
import torch
20
19
import torchvision .models as torchvision_models
21
- from executorch .backends .openvino import OpenVINOQuantizer
22
20
from executorch .backends .openvino .partitioner import OpenvinoPartitioner
21
+ from executorch .backends .openvino .quantizer .quantizer import quantize_model
23
22
from executorch .exir import EdgeProgramManager , to_edge_transform_and_lower
24
23
from executorch .exir .backend .backend_details import CompileSpec
25
- from nncf .experimental .torch .fx .quantization .quantize_pt2e import quantize_pt2e
26
24
from sklearn .metrics import accuracy_score
27
25
from timm .data import resolve_data_config
28
26
from timm .data .transforms_factory import create_transform
29
- from torch .ao .quantization .quantize_pt2e import convert_pt2e , prepare_pt2e
30
27
from torch .export import export
31
28
from torch .export .exported_program import ExportedProgram
32
29
from torchvision import datasets
@@ -129,55 +126,6 @@ def dump_inputs(calibration_dataset, dest_path):
129
126
return input_files , targets
130
127
131
128
132
- def quantize_model (
133
- captured_model : torch .fx .GraphModule ,
134
- calibration_dataset : torch .utils .data .DataLoader ,
135
- use_nncf : bool ,
136
- ) -> torch .fx .GraphModule :
137
- """
138
- Quantizes a model using either NNCF-based or PTQ-based quantization.
139
-
140
- :param captured_model: The model to be quantized, represented as a torch.fx.GraphModule.
141
- :param calibration_dataset: A DataLoader containing calibration data for quantization.
142
- :param use_nncf: Whether to use NNCF-based quantization (True) or standard PTQ (False).
143
- :return: The quantized model as a torch.fx.GraphModule.
144
- """
145
- quantizer = OpenVINOQuantizer ()
146
-
147
- print ("PTQ: Quantize the model" )
148
- default_subset_size = 300
149
- batch_size = calibration_dataset .batch_size
150
- subset_size = (default_subset_size // batch_size ) + int (
151
- default_subset_size % batch_size > 0
152
- )
153
-
154
- def transform (x ):
155
- return x [0 ]
156
-
157
- if use_nncf :
158
-
159
- quantized_model = quantize_pt2e (
160
- captured_model ,
161
- quantizer ,
162
- subset_size = subset_size ,
163
- calibration_dataset = nncf .Dataset (
164
- calibration_dataset , transform_func = transform
165
- ),
166
- fold_quantize = False ,
167
- )
168
- else :
169
- annotated_model = prepare_pt2e (captured_model , quantizer )
170
-
171
- print ("PTQ: Calibrate the model..." )
172
- for data in islice (calibration_dataset , subset_size ):
173
- annotated_model (transform (data ))
174
-
175
- print ("PTQ: Convert the quantized model..." )
176
- quantized_model = convert_pt2e (annotated_model , fold_quantize = False )
177
-
178
- return quantized_model
179
-
180
-
181
129
def validate_model (
182
130
model_file_name : str , calibration_dataset : torch .utils .data .DataLoader
183
131
) -> float :
@@ -231,7 +179,6 @@ def main(
231
179
dataset_path : str ,
232
180
device : str ,
233
181
batch_size : int ,
234
- quantization_flow : str ,
235
182
):
236
183
"""
237
184
Main function to load, quantize, and validate a model.
@@ -244,7 +191,6 @@ def main(
244
191
:param dataset_path: Path to the dataset for calibration/validation.
245
192
:param device: The device to run the model on (e.g., "cpu", "gpu").
246
193
:param batch_size: Batch size for dataset loading.
247
- :param quantization_flow: The quantization method to use.
248
194
"""
249
195
250
196
# Load the selected model
@@ -281,7 +227,6 @@ def main(
281
227
quantized_model = quantize_model (
282
228
aten_dialect .module (),
283
229
calibration_dataset ,
284
- use_nncf = quantization_flow == "nncf" ,
285
230
)
286
231
287
232
aten_dialect : ExportedProgram = export (quantized_model , example_args )
@@ -360,15 +305,6 @@ def main(
360
305
default = "CPU" ,
361
306
help = "Target device for compiling the model (e.g., CPU, GPU). Default is CPU." ,
362
307
)
363
- parser .add_argument (
364
- "--quantization_flow" ,
365
- type = str ,
366
- choices = ["pt2e" , "nncf" ],
367
- default = "nncf" ,
368
- help = "Select the quantization flow (nncf or pt2e):"
369
- " pt2e is the default torch.ao quantization flow, while"
370
- " nncf is a custom method with additional algorithms to improve model performance." ,
371
- )
372
308
373
309
args = parser .parse_args ()
374
310
@@ -384,5 +320,4 @@ def main(
384
320
args .dataset ,
385
321
args .device ,
386
322
args .batch_size ,
387
- args .quantization_flow ,
388
323
)
0 commit comments