Graph module as SOT

jackzhxng · jackzhxng · commit 9978148b8779 · 2025-03-11T11:24:43.000-07:00
diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py
@@ -89,9 +89,10 @@ def __init__(
         dynamic_shapes: Optional[Any] = None,
     ):
         self.model = model
-        self.exported_program: Optional[ExportedProgram] = None
-        # Self.exported_program's pre-autograd graph module, for running
-        # transform passes on the graph prior to torch.export().
+        # Note: treat this as the source of truth for the result of
+        # torch.export'ing a model. If the overall ExportedProgram is needed,
+        # make sure to re-export this graph module to persist any changes. See
+        # https://github.com/pytorch/pytorch/blob/main/torch/export/exported_program.py#L921
         self.pre_autograd_graph_module: Optional[torch.nn.Module] = None
         self.modelname = modelname
         self.max_seq_len = max_seq_len
@@ -186,21 +187,7 @@ def _get_edge_config(self) -> EdgeCompileConfig:
         )
         return edge_config
 
-    def export(self, module: Optional[torch.nn.Module] = None) -> "LLMEdgeManager":
-        """
-        Exports the model pre-autograd. This is not a full export, since it uses
-        torch.export_for_training() to keep autograd-safe ops from getting decomposed.
-        The full torch.export() if called later on during to_edge() or
-        to_edge_transform_and_lower().
-
-        The optional `module` argument is included so that the user can re-export
-        an already-exported module's ExportedProgram's graph module, to persiste
-        the changes into a new ExportedProgram.
-
-        Args:
-            module (Optional[torch.nn.Module]): module to export.
-
-        """
+    def _export(self, module: Optional[torch.nn.Module] = None) -> ExportedProgram:
         dynamic_shape = self._get_dynamic_shape()
         # 1. torch.nn.attention.sdpa_kernel([SDPBackend.MATH]) is for bypassing the dynamo error when tracing
         # 2. torch.no_grad() is for getting rid of the dropout (not sure why training ops will show up)
@@ -237,14 +224,22 @@ def export(self, module: Optional[torch.nn.Module] = None) -> "LLMEdgeManager":
                     kwargs=self.example_kwarg_inputs,
                     dynamic_shapes=dynamic_shape,
                 )
-            self.exported_program = exported_module
-            # Need to store the graph module to record transformation passes.
-            # Persisting those changes back to the ExportedProgram will require
-            # an additional export().
-            self.pre_autograd_graph_module = exported_module.module()
-            if hasattr(self.args, "export_only") and self.args.export_only:
-                torch.export.save(exported_module, self.args.output_name)
+        return exported_module
 
+    def export(self) -> "LLMEdgeManager":
+        """
+        Exports the model pre-autograd. This is not a full export, since it uses
+        torch.export_for_training() to keep autograd-safe ops from getting decomposed.
+        The full torch.export() if called later on during to_edge() or
+        to_edge_transform_and_lower().
+        """
+        exported_module = self._export()
+        # Need to store the graph module to record transformation passes.
+        # Persisting those changes back to an ExportedProgram will require
+        # an additional export().
+        self.pre_autograd_graph_module = exported_module.module()
+        if hasattr(self.args, "export_only") and self.args.export_only:
+            torch.export.save(exported_module, self.args.output_name)
         return self
 
     def run_canonical_optimizations(self):
@@ -403,7 +398,7 @@ def export_to_edge(self) -> "LLMEdgeManager":
         # 1. torch.nn.attention.sdpa_kernel([SDPBackend.MATH]) is for bypassing the dynamo error when tracing
         # 2. torch.no_grad() is for getting rid of the dropout (not sure why training ops will show up)
         with torch.nn.attention.sdpa_kernel([SDPBackend.MATH]), torch.no_grad():
-            if self.exported_program is None:
+            if self.pre_autograd_graph_module is None:
                 # Run export() if it didn't run
                 self.export()
 
@@ -415,12 +410,9 @@ def export_to_edge(self) -> "LLMEdgeManager":
                     return_value=False,
                 )
 
-            # Prior to export, persist the changes to the pre autograd
-            # graph module back to the source-of-truth ExportedProgram.
-            self.export(self.pre_autograd_graph_module)
             with override_export_behaviour:
                 self.edge_manager = export_to_edge(
-                    self.exported_program.module(),  # pyre-fixme[6]
+                    self.pre_autograd_graph_module,  # pyre-fixme[6]
                     self.example_inputs,
                     example_kwarg_inputs=self.example_kwarg_inputs,
                     dynamic_shapes=dynamic_shape,
@@ -466,13 +458,12 @@ def to_edge_transform_and_lower(
         if partitioners is None:
             logging.info("No partitioner provided, skipping backend lowering...")
 
-        # Prior to export, persist the changes to the pre autograd
-        # graph module back to the source-of-truth ExportedProgram.
-        self.export(self.pre_autograd_graph_module)
+        # Need to construct ExportedProgram with the new transformed graph module.
+        exported_module = self._export(self.pre_autograd_graph_module)
 
         edge_config = self._get_edge_config()
         self.edge_manager = to_edge_transform_and_lower(
-            self.exported_program,
+            exported_module,
             partitioner=partitioners,
             compile_config=edge_config,
             constant_methods=self.metadata,