@@ -56,7 +56,9 @@ def get_mps_partitioner(use_kv_cache: bool = False):
56
56
57
57
58
58
def get_coreml_partitioner (
59
- enable_state : bool = False , pt2e_quantize : Optional [str ] = None
59
+ enable_state : bool = False ,
60
+ embedding_quantize : Optional [str ] = None ,
61
+ pt2e_quantize : Optional [str ] = None ,
60
62
):
61
63
try :
62
64
import coremltools as ct
@@ -76,13 +78,17 @@ def get_coreml_partitioner(
76
78
if enable_state :
77
79
minimum_deployment_target = max (minimum_deployment_target , ct .target .iOS18 )
78
80
# In Core ML, quantization is introduced in iOS 16
79
- if pt2e_quantize is not None :
81
+ if embedding_quantize is not None or pt2e_quantize is not None :
80
82
minimum_deployment_target = max (minimum_deployment_target , ct .target .iOS16 )
81
83
# In Core ML, 8-bit activation quantization is introduced in iOS 17
82
- if pt2e_quantize in ("coreml_8a_c8w" , "coreml_baseline_8a_c8w" ):
84
+ if (
85
+ embedding_quantize is not None and int (embedding_quantize .split ("," )[0 ]) == 8
86
+ ) or pt2e_quantize in ("coreml_8a_c8w" , "coreml_baseline_8a_c8w" ):
83
87
minimum_deployment_target = max (minimum_deployment_target , ct .target .iOS17 )
84
88
# In Core ML, 4-bit weight compression is introduced in iOS 18
85
- if pt2e_quantize in ("coreml_c4w" , "coreml_8a_c4w" , "coreml_baseline_8a_c4w" ):
89
+ if (
90
+ embedding_quantize is not None and int (embedding_quantize .split ("," )[0 ]) == 4
91
+ ) or pt2e_quantize in ("coreml_c4w" , "coreml_8a_c4w" , "coreml_baseline_8a_c4w" ):
86
92
minimum_deployment_target = max (minimum_deployment_target , ct .target .iOS18 )
87
93
88
94
compile_specs = CoreMLBackend .generate_compile_specs ( # pyre-fixme[16]
0 commit comments