Lunwen comments

jackzhxng · jackzhxng · commit 01974e063322 · 2024-08-16T08:33:23.000-07:00
diff --git a/examples/models/phi-3-mini/export_phi-3-mini.py b/examples/models/phi-3-mini/export_phi-3-mini.py
@@ -16,6 +16,7 @@
 from executorch.backends.xnnpack.utils.configs import get_xnnpack_edge_compile_config
 from executorch.exir import to_edge
 from torch._export import capture_pre_autograd_graph
+from torch.export.experimental import _export_forward_backward
 from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
 
 from torch.ao.quantization.quantizer.xnnpack_quantizer import (
@@ -80,9 +81,10 @@ def main(args) -> None:
             strict=False,
             pre_dispatch=False,
         )
+        joint_graph = _export_forward_backward(model)
 
     edge_config = get_xnnpack_edge_compile_config()
-    edge_manager = to_edge(model, compile_config=edge_config)
+    edge_manager = to_edge(joint_graph, compile_config=edge_config)
     edge_manager = edge_manager.to_backend(XnnpackPartitioner(has_dynamic_shapes=True))
     et_program = edge_manager.to_executorch()
 
diff --git a/examples/models/phi3-mini-lora/export_model.py b/examples/models/phi3-mini-lora/export_model.py
@@ -13,6 +13,11 @@
 from torchtune.models.phi3._model_builders import lora_phi3_mini
 
 class TrainingModule(torch.nn.Module):
+    """
+    The model being trained should return the loss from forward(). This
+    class wraps the actual phi3-mini model and calculates an arbitrary
+    loss with its forward() output.
+    """
     def __init__(self, model, loss):
         super().__init__()
         self.model = model
@@ -21,14 +26,14 @@ def __init__(self, model, loss):
     def forward(self, input):
         # Output is of the shape (seq_len, vocab_size).
         output = self.model(input)
-        # Vocab size of 32064 is taken from the phi3 model itself.
+        # 32064 the vocab size of the phi3-mini model.
         target = zeros((1, 32064), dtype=long)
         return self.loss(output, target)
 
 @no_grad()
-def export_mini_phi3_lora(model) -> None:
+def export_phi3_mini_lora(model) -> None:
     """
-    Export the example mini-phi3 with LoRA model to executorch.
+    Export the example phi3-mini with LoRA model to executorch.
 
     Note: need to use the SDPBackend's custom kernel for sdpa (scalable
     dot product attention) because the default sdpa kernel used in the
@@ -50,15 +55,15 @@ def export_mini_phi3_lora(model) -> None:
     executorch_program = edge_program.to_executorch()
 
     # 4. Save the compiled .pte program.
-    print("Saving to mini_phi3_lora.pte")
-    with open("mini_phi3_lora.pte", "wb") as file:
+    print("Saving to phi3_mini_lora.pte")
+    with open("phi3_mini_lora.pte", "wb") as file:
         file.write(executorch_program.buffer)
 
     print("Done.")
 
-def export_mini_phi3_lora_training(model) -> None:
+def export_phi3_mini_lora_training(model) -> None:
     """
-    Export the example mini-phi3 with LoRA model to executorch for training, only.
+    Export the example phi3-mini with LoRA model to executorch for training, only.
     """
     print("Exporting mini phi3 with LoRA for training")
     # 1. torch.export: Defines the program with the ATen operator set.
@@ -73,19 +78,21 @@ def export_mini_phi3_lora_training(model) -> None:
         print("Lowering to edge dialect")
         edge_program = to_edge(joint_graph)
 
+        print(edge_program._edge_programs["forward"].graph_module)
+
     # 3. to_executorch: Convert the graph to an ExecuTorch program.
     print("Exporting to executorch")
     executorch_program = edge_program.to_executorch()
 
     # 4. Save the compiled .pte program.
-    print("Saving to mini_phi3_lora_training.pte")
-    with open("mini_phi3_lora_training.pte", "wb") as file:
+    print("Saving to phi3_mini_lora_training.pte")
+    with open("phi3_mini_lora_training.pte", "wb") as file:
         file.write(executorch_program.buffer)
 
     print("Done.")
 
 
-def run_mini_phi3_lora(model) -> Tensor:
+def run_phi3_mini_lora(model) -> Tensor:
     """Run the model and return the result."""
     # Input shape: (batch_size, seq_len).
     args = zeros((1, 10), dtype=int64)
@@ -103,11 +110,11 @@ def main() -> None:
     )
 
     # Export for inference.
-    export_mini_phi3_lora(lora_model)
+    export_phi3_mini_lora(lora_model)
 
     # Export for training.
     lora_training_model = TrainingModule(lora_model, torch.nn.CrossEntropyLoss())
-    export_mini_phi3_lora_training(lora_training_model)
+    export_phi3_mini_lora_training(lora_training_model)
 
 
 if __name__ == "__main__":