3
3
# CoreML backend for delegating a EdgeProgram to CoreML.
4
4
5
5
import json
6
+ import logging
6
7
7
8
import shutil
8
9
import uuid
14
15
from typing import Any , Dict , final , List , Optional , Tuple
15
16
16
17
import coremltools as ct
18
+ import coremltools .optimize as cto
17
19
import executorchcoreml
18
20
19
21
from executorch .exir .backend .backend_details import (
23
25
)
24
26
from executorch .exir .backend .compile_spec_schema import CompileSpec
25
27
28
+ logger = logging .getLogger (__name__ )
29
+ logger .setLevel (logging .WARNING )
30
+
26
31
27
32
class COMPILE_SPEC_KEYS (Enum ):
28
33
COMPUTE_UNITS = "compute_units"
29
34
MODEL_TYPE = "model_type"
30
35
MIN_DEPLOYMENT_TARGET = "min_deployment_target"
31
36
MODEL_COMPUTE_PRECISION = "model_compute_precision"
37
+ OP_LINEAR_QUANTIZER_CONFIG = "op_linear_quantizer_config"
32
38
33
39
34
40
class MODEL_PATHS (Enum ):
@@ -169,12 +175,44 @@ def generate_compute_unit_compile_spec(
169
175
compute_unit .name .lower ().encode ("utf-8" ),
170
176
)
171
177
178
+ @staticmethod
179
+ def generate_op_linear_quantizer_config_compile_spec (
180
+ op_linear_quantizer_config : Dict ,
181
+ ) -> CompileSpec :
182
+ """
183
+ Returns the compile spec representing the model post conversion quantization,
184
+ which is a dict that will construct cto.coreml.OpLinearQuantizerConfig
185
+ """
186
+ str_representation = json .dumps (op_linear_quantizer_config )
187
+ byte_representation = str_representation .encode ("utf-8" )
188
+ return CompileSpec (
189
+ COMPILE_SPEC_KEYS .OP_LINEAR_QUANTIZER_CONFIG .value ,
190
+ byte_representation ,
191
+ )
192
+
193
+ @staticmethod
194
+ def op_linear_quantizer_config_from_compile_specs (
195
+ compile_specs : List [CompileSpec ],
196
+ ) -> cto .coreml .OpLinearQuantizerConfig :
197
+ """
198
+ Returns the model's post conversion quantization by parsing the list of compile specs.
199
+ """
200
+ for compile_spec in compile_specs :
201
+ if compile_spec .key == COMPILE_SPEC_KEYS .OP_LINEAR_QUANTIZER_CONFIG .value :
202
+ config_dict_str = compile_spec .value .decode ("utf-8" )
203
+ config_dict = json .loads (config_dict_str )
204
+ config = cto .coreml .OpLinearQuantizerConfig ._from_dict (config_dict )
205
+ return config
206
+
207
+ return None
208
+
172
209
@staticmethod
173
210
def generate_compile_specs (
174
211
compute_unit : ct .ComputeUnit = ct .ComputeUnit .ALL ,
175
212
minimum_deployment_target : ct .target = ct .target .iOS15 ,
176
213
compute_precision : ct .precision = ct .precision .FLOAT16 ,
177
214
model_type : MODEL_TYPE = MODEL_TYPE .MODEL ,
215
+ op_linear_quantizer_config : Optional [Dict ] = None ,
178
216
) -> List [CompileSpec ]:
179
217
"""
180
218
Returns the list of compile specs that's used by CoreMLBackend to lower the module.
@@ -192,6 +230,12 @@ def generate_compile_specs(
192
230
CoreMLBackend .generate_compute_precision_compile_spec (compute_precision )
193
231
)
194
232
compile_specs .append (CoreMLBackend .generate_model_type_compile_spec (model_type ))
233
+ if op_linear_quantizer_config is not None :
234
+ compile_specs .append (
235
+ CoreMLBackend .generate_op_linear_quantizer_config_compile_spec (
236
+ op_linear_quantizer_config
237
+ )
238
+ )
195
239
196
240
return compile_specs
197
241
@@ -368,18 +412,18 @@ def preprocess(
368
412
compile_specs ,
369
413
)
370
414
)
371
-
372
415
model_compute_precision : ct .precision = (
373
416
CoreMLBackend .model_compute_precision_from_compile_specs (compile_specs )
374
417
)
375
-
376
418
minimum_deployment_target : ct .target = (
377
419
CoreMLBackend .min_deployment_target_from_compile_specs (compile_specs )
378
420
)
379
-
380
421
compute_units : ct .ComputeUnit = CoreMLBackend .compute_unit_from_compile_specs (
381
422
compile_specs
382
423
)
424
+ op_linear_quantizer_config = (
425
+ CoreMLBackend .op_linear_quantizer_config_from_compile_specs (compile_specs )
426
+ )
383
427
384
428
mlmodel = ct .convert (
385
429
model = edge_program ,
@@ -392,4 +436,15 @@ def preprocess(
392
436
compute_units = compute_units ,
393
437
)
394
438
439
+ if op_linear_quantizer_config is not None :
440
+ logger .warning (
441
+ "Core ML Backend op_linear_quantizer_config API is experimental"
442
+ )
443
+ config = cto .coreml .OptimizationConfig (
444
+ global_config = op_linear_quantizer_config ,
445
+ # skip embedding
446
+ op_type_configs = {"gather" : None },
447
+ )
448
+ mlmodel = cto .coreml .linear_quantize_weights (mlmodel , config = config )
449
+
395
450
return CoreMLBackend .preprocess_model (mlmodel , model_type = model_type )
0 commit comments