We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 7b3549b commit dc164f5Copy full SHA for dc164f5
examples/models/llava/export_llava.py
@@ -208,10 +208,15 @@ def export_all(llava_model: LlavaModel):
208
partitioner={
209
"image_encoder": [XnnpackPartitioner()],
210
"text_model": [
211
+ # First partition the DQLinear nodes, then partition the rest of the nodes,
212
+ # to avoid multiple DQLinear nodes in the same partition,
213
+ # to avoid holding multiple unpacked and packed weight buffers in memory,
214
+ # to reduce peak memory footprint.
215
XnnpackPartitioner(
216
config_precisions=ConfigPrecisionType.DYNAMIC_QUANT,
217
per_op_mode=True,
- )
218
+ ),
219
+ XnnpackPartitioner(),
220
],
221
},
222
compile_config=EdgeCompileConfig(_check_ir_validity=False),
0 commit comments