Update aoti APIs

angelayi · angelayi · commit 9fb3007826e8 · 2025-01-10T09:56:08.000-08:00
diff --git a/torchchat/cli/builder.py b/torchchat/cli/builder.py
@@ -571,9 +571,8 @@ def do_nothing(max_batch_size, max_seq_length):
             # attributes will NOT be seen on by AOTI-compiled forward
             # function, e.g. calling model.setup_cache will NOT touch
             # AOTI compiled and maintained model buffers such as kv_cache.
-            from torch._inductor.package import load_package
 
-            aoti_compiled_model = load_package(
+            aoti_compiled_model = torch._inductor.aoti_load_package(
                 str(builder_args.aoti_package_path.absolute())
             )
 
diff --git a/torchchat/export.py b/torchchat/export.py
@@ -8,10 +8,10 @@
 from typing import Optional
 
 import torch
+import torch._inductor
 import torch.nn as nn
 
 from torch.export import Dim
-import torch._inductor
 
 from torchchat.cli.builder import (
     _initialize_model,
@@ -68,20 +68,24 @@ def export_for_server(
 
     with torch.nn.attention.sdpa_kernel([torch.nn.attention.SDPBackend.MATH]):
         metadata = {}  # TODO: put more metadata here
-        options = {"aot_inductor.package": package, "aot_inductor.metadata": metadata}
+        options = {"aot_inductor.metadata": metadata}
         if not package:
             options = {"aot_inductor.output_path": output_path}
 
-        path = torch._export.aot_compile(
+        ep = torch.export.export(
             model,
             example_inputs,
             dynamic_shapes=dynamic_shapes,
-            options=options,
         )
 
         if package:
-            from torch._inductor.package import package_aoti
-            path = package_aoti(output_path, path)
+            path = torch._inductor.aoti_compile_and_package(
+                ep, package_path=output_path, inductor_configs=options
+            )
+        else:
+            path = torch._inductor.aot_compile(
+                ep.module(), example_inputs, options=options
+            )
 
     print(f"The generated packaged model can be found at: {path}")
     return path
@@ -106,9 +110,6 @@ def export_for_server(
     from executorch.backends.xnnpack.partition.xnnpack_partitioner import (
         XnnpackDynamicallyQuantizedPartitioner,
     )
-    from executorch.backends.xnnpack._passes.convert_to_linear import (
-        ConvertToLinearPass,
-    )
     from executorch.exir import EdgeProgramManager, to_edge
 
     from executorch.exir.capture._config import (

Original file line number	Diff line number	Diff line change
`@@ -571,9 +571,8 @@ def do_nothing(max_batch_size, max_seq_length):`
`571`	`571`	`# attributes will NOT be seen on by AOTI-compiled forward`
`572`	`572`	`# function, e.g. calling model.setup_cache will NOT touch`
`573`	`573`	`# AOTI compiled and maintained model buffers such as kv_cache.`
`574`		`- from torch._inductor.package import load_package`
`575`	`574`
`576`		`- aoti_compiled_model = load_package(`
	`575`	`+ aoti_compiled_model = torch._inductor.aoti_load_package(`
`577`	`576`	`str(builder_args.aoti_package_path.absolute())`
`578`	`577`	`)`
`579`	`578`