@@ -121,21 +121,22 @@ def forward(self, images):
121
121
llava_image_encode = LlavaImageEncoder (llava )
122
122
123
123
# quantizer
124
- linear_quantizer = XNNPACKQuantizer ()
125
- operator_config_dynamic = get_symmetric_quantization_config (
126
- is_per_channel = True , is_dynamic = True
127
- )
128
- linear_quantizer .set_global (operator_config_dynamic )
124
+ quantizer = XNNPACKQuantizer ()
125
+ quantizer .set_global (get_symmetric_quantization_config ())
129
126
130
- manager = LlavaEdgeManager (
131
- model = llava_image_encode ,
132
- modelname = "llava_image_encoder" ,
133
- max_seq_len = llava .text_model_args .max_seq_len , # This may not be right
134
- dtype = DType .fp32 ,
135
- use_kv_cache = True ,
136
- example_inputs = (resized ,),
137
- dynamic_shapes = dynamic_shapes ,
138
- ).capture_pre_autograd_graph ()
127
+ manager = (
128
+ LlavaEdgeManager (
129
+ model = llava_image_encode ,
130
+ modelname = "llava_image_encoder" ,
131
+ max_seq_len = llava .text_model_args .max_seq_len , # This may not be right
132
+ dtype = DType .fp32 ,
133
+ use_kv_cache = True ,
134
+ example_inputs = (resized ,),
135
+ dynamic_shapes = dynamic_shapes ,
136
+ )
137
+ .capture_pre_autograd_graph ()
138
+ .pt2e_quantize ([quantizer ])
139
+ )
139
140
140
141
# lower to executorch
141
142
with torch .no_grad ():
@@ -186,9 +187,11 @@ def main():
186
187
llava_model = LlavaModel (use_sdpa_with_kv_cache_op = args .use_sdpa_with_kv_cache )
187
188
llava = llava_model .get_eager_model ()
188
189
189
- prompt_before_image , resized , prompt_after_image = (
190
- llava_model .get_inputs_for_prefill ()
191
- )
190
+ (
191
+ prompt_before_image ,
192
+ resized ,
193
+ prompt_after_image ,
194
+ ) = llava_model .get_inputs_for_prefill ()
192
195
193
196
image_encoder_ep = export_image_encoder (
194
197
llava , resized , llava_model ._get_image_dynamic_shapes ()
@@ -211,9 +214,7 @@ def main():
211
214
"text_model" : text_model_ep ,
212
215
},
213
216
partitioner = {
214
- "image_encoder" : [
215
- XnnpackPartitioner (config_precisions = ConfigPrecisionType .FP32 )
216
- ],
217
+ "image_encoder" : [XnnpackPartitioner ()],
217
218
"text_model" : [
218
219
XnnpackPartitioner (
219
220
config_precisions = ConfigPrecisionType .DYNAMIC_QUANT ,
0 commit comments