We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 40720f0 commit 030fc3fCopy full SHA for 030fc3f
examples/models/llava/export_llava.py
@@ -211,10 +211,15 @@ def export_all(llava_model: LlavaModel):
211
partitioner={
212
"image_encoder": [XnnpackPartitioner()],
213
"text_model": [
214
+ # First partition the DQLinear nodes, then partition the rest of the nodes,
215
+ # to avoid multiple DQLinear nodes in the same partition,
216
+ # to avoid holding multiple unpacked and packed weight buffers in memory,
217
+ # to reduce peak memory footprint.
218
XnnpackPartitioner(
219
config_precisions=ConfigPrecisionType.DYNAMIC_QUANT,
220
per_op_mode=True,
- )
221
+ ),
222
+ XnnpackPartitioner(),
223
],
224
},
225
compile_config=EdgeCompileConfig(_check_ir_validity=False),
0 commit comments