Introduce write_to_file api (#2307)

lucylq · facebook-github-bot · commit 5316e69b63bc · 2024-03-07T18:15:24.000-08:00
Summary: Pull Request resolved: #2307 Update callsites that save to file to use write_to_file api instead of .buffer bypass-github-export-checks Reviewed By: dbort Differential Revision: D54526788 fbshipit-source-id: 6b4975f3fd7fd6c74b97a486a2f58aa62a7b2a71
diff --git a/examples/apple/mps/scripts/mps_example.py b/examples/apple/mps/scripts/mps_example.py
@@ -143,13 +143,10 @@
         if not args.use_fp16:
             extension = "fp32"
         model_name = f"{model_name}_{extension}"
-        program_buffer = bundled_program_buffer
-    else:
-        program_buffer = executorch_program.buffer
 
     if args.generate_etrecord:
         etrecord_path = "etrecord.bin"
         logging.info("generating etrecord.bin")
         generate_etrecord(etrecord_path, edge_program_manager_copy, executorch_program)
 
-    save_pte_program(program_buffer, model_name)
+    save_pte_program(executorch_program, model_name)
diff --git a/examples/models/llama2/builder.py b/examples/models/llama2/builder.py
@@ -343,4 +343,4 @@ def save_to_pte(self, output_name: str) -> None:
             output_name (Optional[str]): The name of the .pte file.
         """
         assert output_name, "Need a valid output name"
-        save_pte_program(self.export_program.buffer, output_name, self.output_dir)
+        save_pte_program(self.export_program, output_name, self.output_dir)
diff --git a/examples/portable/custom_ops/custom_ops_1.py b/examples/portable/custom_ops/custom_ops_1.py
@@ -51,7 +51,7 @@ def main():
         (input,),
         edge_compile_config=EdgeCompileConfig(_check_ir_validity=False),
     )
-    save_pte_program(prog.buffer, model_name)
+    save_pte_program(prog, model_name)
 
 
 if __name__ == "__main__":
diff --git a/examples/portable/custom_ops/custom_ops_2.py b/examples/portable/custom_ops/custom_ops_2.py
@@ -32,7 +32,7 @@ def main():
         (input,),
         edge_compile_config=EdgeCompileConfig(_check_ir_validity=False),
     )
-    save_pte_program(prog.buffer, model_name)
+    save_pte_program(prog, model_name)
 
 
 if __name__ == "__main__":
diff --git a/examples/portable/scripts/export.py b/examples/portable/scripts/export.py
@@ -71,7 +71,7 @@ def main() -> None:
             dynamic_shapes=dynamic_shapes,
             backend_config=backend_config,
         )
-    save_pte_program(prog.buffer, args.model_name, args.output_dir)
+    save_pte_program(prog, args.model_name, args.output_dir)
 
 
 if __name__ == "__main__":
diff --git a/examples/portable/utils.py b/examples/portable/utils.py
@@ -98,11 +98,13 @@ def export_to_exec_prog(
     return exec_prog
 
 
-def save_pte_program(buffer: bytes, model_name: str, output_dir: str = "") -> None:
+def save_pte_program(
+    prog: ExecutorchProgramManager, model_name: str, output_dir: str = ""
+) -> None:
     filename = os.path.join(output_dir, f"{model_name}.pte")
     try:
         with open(filename, "wb") as file:
-            file.write(buffer)
+            prog.write_to_file(file)
             logging.info(f"Saved exported program to {filename}")
     except Exception as e:
         logging.error(f"Error while saving to {filename}: {e}")
diff --git a/examples/qualcomm/scripts/export_example.py b/examples/qualcomm/scripts/export_example.py
@@ -74,4 +74,4 @@
     executorch_program = delegated_program.to_executorch(
         config=ExecutorchBackendConfig(extract_constant_segment=False)
     )
-    save_pte_program(executorch_program.buffer, args.model_name)
+    save_pte_program(executorch_program, args.model_name)
diff --git a/examples/xnnpack/aot_compiler.py b/examples/xnnpack/aot_compiler.py
@@ -111,4 +111,4 @@
 
     quant_tag = "q8" if args.quantize else "fp32"
     model_name = f"{args.model_name}_xnnpack_{quant_tag}"
-    save_pte_program(exec_prog.buffer, model_name, args.output_dir)
+    save_pte_program(exec_prog, model_name, args.output_dir)
diff --git a/examples/xnnpack/quantization/example.py b/examples/xnnpack/quantization/example.py
@@ -193,7 +193,7 @@ def main() -> None:
     prog = edge_m.to_executorch(
         config=ExecutorchBackendConfig(extract_constant_segment=False)
     )
-    save_pte_program(prog.buffer, f"{args.model_name}_quantized")
+    save_pte_program(prog, f"{args.model_name}_quantized")
     end = time.perf_counter()
     logging.info(f"Save time: {end - start}s")
     logging.info("finished")
diff --git a/examples/xtensa/aot/export_example.py b/examples/xtensa/aot/export_example.py
@@ -90,4 +90,4 @@ def forward(self, x: torch.Tensor):
     logging.info(f"Final exported graph:\n{exec_prog.exported_program().graph}")
 
     # Save the program as XtensaDemoModel.pte
-    save_pte_program(exec_prog.buffer, "XtensaDemoModel")
+    save_pte_program(exec_prog, "XtensaDemoModel")
diff --git a/exir/program/_program.py b/exir/program/_program.py
@@ -5,6 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import copy
+import io
 import logging
 from typing import Any, Dict, List, Optional, Sequence, Set, Union
 
@@ -437,7 +438,8 @@ def buffer(self) -> bytes:
         """Returns the serialized ExecuTorch binary as a byte string.
 
         Note that the call to `buffer` may allocate a very large amount of
-        contiguous memory, depending on the model size.
+        contiguous memory, depending on the model size. If writing to a file,
+        use `write_to_file` which won't incur additional copies.
         """
         # TODO(T181494963): update pybinding to remove buffer cache, which can consume large
         # amounts of memory longer than necessary.
@@ -478,6 +480,14 @@ def dump_graph_module(self) -> torch.fx.GraphModule:
     def dump_exported_program(self) -> ExportedProgram:
         return self.exported_program
 
+    def write_to_file(self, open_file: io.BufferedIOBase) -> None:
+        """
+        Writes the serialized ExecuTorch binary to the file at `open_file`. Prefer to use this over
+        `buffer`, as it writes to file without copying into a contiguous block of memory first,
+        reducing the peak memory usage.
+        """
+        self._get_pte_data().write_to_file(open_file)
+
 
 def _get_aten_to_edge_passes(config: EdgeCompileConfig):
     # TODO: the last two passes for aten_to_edge need to be eliminated_dead_code -> debug_handle_generator. After enable
@@ -769,7 +779,8 @@ def buffer(self) -> bytes:
         """Returns the serialized ExecuTorch binary as a byte string.
 
         Note that the call to `buffer` may allocate a very large amount of
-        contiguous memory, depending on the model size.
+        contiguous memory, depending on the model size. If writing to a file,
+        use `write_to_file` which won't incur additional copies.
         """
         # TODO(T181494963): update pybinding to remove buffer cache, which can consume large
         # amounts of memory longer than necessary.
@@ -800,6 +811,14 @@ def dump_graph_module(self) -> torch.fx.GraphModule:
     def get_multi_method_graph_module(self) -> "MultiMethodExirExportedProgram":
         return self._executorch_dialect_ir_program
 
+    def write_to_file(self, open_file: io.BufferedIOBase) -> None:
+        """
+        Writes the serialized ExecuTorch binary to the file at `open_file`. Prefer to use this over
+        `buffer`, as it writes to file without copying into a contiguous block of memory first,
+        reducing the peak memory usage.
+        """
+        self._get_pte_data().write_to_file(open_file)
+
 
 # TODO(T152006915): Merge this into to_executorch and then delete it.
 def multi_method_program_to_executorch(
@@ -1210,10 +1229,19 @@ def buffer(self) -> bytes:
         """Returns the serialized ExecuTorch binary as a byte string.
 
         Note that the call to `buffer` may allocate a very large amount of
-        contiguous memory, depending on the model size.
+        contiguous memory, depending on the model size. If writing to a file,
+        use `write_to_file` which won't incur additional copies.
         """
         # TODO(T181494963): update pybinding to remove buffer cache, which can consume large
         # amounts of memory longer than necessary.
         if self._buffer is None:
             self._buffer = bytes(self._pte_data)
         return self._buffer
+
+    def write_to_file(self, open_file: io.BufferedIOBase) -> None:
+        """
+        Writes the serialized ExecuTorch binary to the file at `open_file`. Prefer to use this over
+        `buffer`, as it writes to file without copying into a contiguous block of memory first,
+        reducing the peak memory usage.
+        """
+        self._pte_data.write_to_file(open_file)

Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,7 @@ def main():`
`51`	`51`	`(input,),`
`52`	`52`	`edge_compile_config=EdgeCompileConfig(_check_ir_validity=False),`
`53`	`53`	`)`
`54`		`- save_pte_program(prog.buffer, model_name)`
	`54`	`+ save_pte_program(prog, model_name)`
`55`	`55`
`56`	`56`
`57`	`57`	`if __name__ == "__main__":`
Original file line number	Diff line number	Diff line change
`@@ -32,7 +32,7 @@ def main():`
`32`	`32`	`(input,),`
`33`	`33`	`edge_compile_config=EdgeCompileConfig(_check_ir_validity=False),`
`34`	`34`	`)`
`35`		`- save_pte_program(prog.buffer, model_name)`
	`35`	`+ save_pte_program(prog, model_name)`
`36`	`36`
`37`	`37`
`38`	`38`	`if __name__ == "__main__":`
Original file line number	Diff line number	Diff line change
`@@ -71,7 +71,7 @@ def main() -> None:`
`71`	`71`	`dynamic_shapes=dynamic_shapes,`
`72`	`72`	`backend_config=backend_config,`
`73`	`73`	`)`
`74`		`- save_pte_program(prog.buffer, args.model_name, args.output_dir)`
	`74`	`+ save_pte_program(prog, args.model_name, args.output_dir)`
`75`	`75`
`76`	`76`
`77`	`77`	`if __name__ == "__main__":`
Original file line number	Diff line number	Diff line change
`@@ -74,4 +74,4 @@`
`74`	`74`	`executorch_program = delegated_program.to_executorch(`
`75`	`75`	`config=ExecutorchBackendConfig(extract_constant_segment=False)`
`76`	`76`	`)`
`77`		`- save_pte_program(executorch_program.buffer, args.model_name)`
	`77`	`+ save_pte_program(executorch_program, args.model_name)`
Original file line number	Diff line number	Diff line change
`@@ -193,7 +193,7 @@ def main() -> None:`
`193`	`193`	`prog = edge_m.to_executorch(`
`194`	`194`	`config=ExecutorchBackendConfig(extract_constant_segment=False)`
`195`	`195`	`)`
`196`		`- save_pte_program(prog.buffer, f"{args.model_name}_quantized")`
	`196`	`+ save_pte_program(prog, f"{args.model_name}_quantized")`
`197`	`197`	`end = time.perf_counter()`
`198`	`198`	`logging.info(f"Save time: {end - start}s")`
`199`	`199`	`logging.info("finished")`