Skip to content

Update phi-3-mini to use the export library #4190

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 28 additions & 47 deletions examples/models/phi-3-mini/export_phi-3-mini.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,66 +5,47 @@
# LICENSE file in the root directory of this source tree.

import torch
from executorch.extension.llm.export.builder import DType, LLMEdgeManager

from executorch.backends.transforms.duplicate_dynamic_quant_chain import (
DuplicateDynamicQuantChainPass,
)
from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner
from executorch.backends.xnnpack.utils.configs import get_xnnpack_edge_compile_config
from executorch.exir import to_edge
from torch._export import capture_pre_autograd_graph
from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e

from torch.ao.quantization.quantizer.xnnpack_quantizer import (
get_symmetric_quantization_config,
XNNPACKQuantizer,
from executorch.extension.llm.export.partitioner_lib import get_xnnpack_partitioner
from executorch.extension.llm.export.quantizer_lib import (
DynamicQuantLinearOptions,
get_pt2e_quantizers,
PT2EQuantOptions,
)

from transformers import Phi3ForCausalLM


def main() -> None:
torch.random.manual_seed(0)
torch.manual_seed(42)

# pyre-ignore: Undefined attribute [16]: Module `transformers` has no attribute `Phi3ForCausalLM`
model = Phi3ForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct")

example_inputs = (torch.randint(0, 100, (1, 100), dtype=torch.long),)
dynamic_shape = {"input_ids": {1: torch.export.Dim("sequence_length", max=128)}}

xnnpack_quant_config = get_symmetric_quantization_config(
is_per_channel=True, is_dynamic=True
)
xnnpack_quantizer = XNNPACKQuantizer()
xnnpack_quantizer.set_global(xnnpack_quant_config)

with torch.nn.attention.sdpa_kernel(
[torch.nn.attention.SDPBackend.MATH]
), torch.no_grad():
model = capture_pre_autograd_graph(
model, example_inputs, dynamic_shapes=dynamic_shape
modelname = "phi-3-mini"

(
LLMEdgeManager(
model=model,
modelname=modelname,
max_seq_len=128,
dtype=DType.fp32,
use_kv_cache=False,
example_inputs=(torch.randint(0, 100, (1, 100), dtype=torch.long),),
enable_dynamic_shape=True,
verbose=True,
)
model = prepare_pt2e(model, xnnpack_quantizer)
model(*example_inputs)
model = convert_pt2e(model, fold_quantize=False)
DuplicateDynamicQuantChainPass()(model)
# TODO(lunwenh): update it to use export once
# https://github.com/pytorch/pytorch/issues/128394 is resolved.
model = torch.export._trace._export(
model,
example_inputs,
dynamic_shapes=dynamic_shape,
strict=False,
pre_dispatch=False,
.set_output_dir(".")
.capture_pre_autograd_graph()
.pt2e_quantize(
get_pt2e_quantizers(PT2EQuantOptions(None, DynamicQuantLinearOptions()))
)

edge_config = get_xnnpack_edge_compile_config()
edge_manager = to_edge(model, compile_config=edge_config)
edge_manager = edge_manager.to_backend(XnnpackPartitioner(has_dynamic_shapes=True))
et_program = edge_manager.to_executorch()

with open("phi-3-mini.pte", "wb") as file:
file.write(et_program.buffer)
.export_to_edge()
.to_backend([get_xnnpack_partitioner()])
.to_executorch()
.save_to_pte(f"{modelname}.pte")
)


if __name__ == "__main__":
Expand Down
Loading