@@ -56,7 +56,9 @@ def get_mps_partitioner(use_kv_cache: bool = False):
56
56
57
57
58
58
def get_coreml_partitioner (
59
- enable_state : bool = False , pt2e_quantize : Optional [str ] = None
59
+ enable_state : bool = False ,
60
+ embedding_quantize : Optional [str ] = None ,
61
+ pt2e_quantize : Optional [str ] = None ,
60
62
):
61
63
try :
62
64
import coremltools as ct
@@ -76,13 +78,19 @@ def get_coreml_partitioner(
76
78
if enable_state :
77
79
minimum_deployment_target = max (minimum_deployment_target , ct .target .iOS18 )
78
80
# In Core ML, quantization is introduced in iOS 16
79
- if pt2e_quantize is not None :
81
+ if embedding_quantize is not None or pt2e_quantize is not None :
80
82
minimum_deployment_target = max (minimum_deployment_target , ct .target .iOS16 )
81
83
# In Core ML, 8-bit activation quantization is introduced in iOS 17
82
- if pt2e_quantize in ("coreml_8a_c8w" , "coreml_baseline_8a_c8w" ):
84
+ if (
85
+ (embedding_quantize is not None and int (embedding_quantize .split ("," )[0 ]) == 8 )
86
+ or pt2e_quantize in ("coreml_8a_c8w" , "coreml_baseline_8a_c8w" )
87
+ ):
83
88
minimum_deployment_target = max (minimum_deployment_target , ct .target .iOS17 )
84
89
# In Core ML, 4-bit weight compression is introduced in iOS 18
85
- if pt2e_quantize in ("coreml_c4w" , "coreml_8a_c4w" , "coreml_baseline_8a_c4w" ):
90
+ if (
91
+ (embedding_quantize is not None and int (embedding_quantize .split ("," )[0 ]) == 4 )
92
+ or pt2e_quantize in ("coreml_c4w" , "coreml_8a_c4w" , "coreml_baseline_8a_c4w" )
93
+ ):
86
94
minimum_deployment_target = max (minimum_deployment_target , ct .target .iOS18 )
87
95
88
96
compile_specs = CoreMLBackend .generate_compile_specs ( # pyre-fixme[16]
0 commit comments