[export] require Module to be passed to export (pytorch#117528)

suo · pytorchmergebot · commit c393b2f1ee9a · 2024-01-21T19:36:13.000Z
This PR changes torch.export to require an nn.Module as input, rather than taking an arbitrary callable. The rationale for this is that we have several invariants the ExportedProgram that are ambiguous if the top-level object being traced is a function: 1. We "guarantee" that every call_function node has an `nn_module_stack` populated. 2. We offer ways to access the state_dict/parameters/buffers of the exported program. We'd like torch.export to offer strong invariants—the value proposition of export is that you can trade flexibility for stronger guarantees about your model. An alternative design would be to implicitly convert the top-level function into a module, rather than require that the user provide a module. I think that's reasonable (it's what we did in TorchScript), but in the spirit of being explicit (another design tenet of export) I avoid that here. Differential Revision: [D52789321](https://our.internmc.facebook.com/intern/diff/D52789321/) Pull Request resolved: pytorch#117528 Approved by: https://github.com/thiagocrepaldi, https://github.com/zhxchen17, https://github.com/avikchaudhuri, https://github.com/tugsbayasgalan
diff --git a/test/export/test_export.py b/test/export/test_export.py
@@ -2876,8 +2876,11 @@ def test_lift_custom_obj(self):
 
         custom_obj = torch.classes._TorchScriptTesting._PickleTester([3, 4])
 
-        def f(x):
-            return x + x
+        class Foo(torch.nn.Module):
+            def forward(self, x):
+                return x + x
+
+        f = Foo()
 
         inputs = (torch.zeros(4, 4),)
         ep = export(f, inputs)
diff --git a/test/onnx/test_fx_to_onnx_with_onnxruntime.py b/test/onnx/test_fx_to_onnx_with_onnxruntime.py
@@ -83,12 +83,15 @@ def setUp(self):
         self.ort_version = onnxruntime.__version__
 
     def test_simple_function(self):
-        def func(x):
-            # TODO(justinchuby): Replicate torch's type casting policy
-            # in the exporter for type promotion support
-            y = x + 1.0
-            z = y.relu()
-            return (y, z)
+        class Foo(torch.nn.Module):
+            def forward(self, x):
+                # TODO(justinchuby): Replicate torch's type casting policy
+                # in the exporter for type promotion support
+                y = x + 1.0
+                z = y.relu()
+                return (y, z)
+
+        func = Foo()
 
         tensor_x = torch.randn(1, 1, 2, dtype=torch.float32)
 
@@ -118,10 +121,13 @@ def test_func_with_args_and_tensor_kwargs(self):
         # practice to set mutable default values.
         # `DynamoOptimizeExporter` applies a workaround by binding args and kwargs to
         # model signature and fill in the default values of unprovided optional arguments.
-        def func(x, b=torch.tensor(1.0)):
-            y = x + b
-            z = y.relu()
-            return (y, z)
+        class Foo(torch.nn.Module):
+            def forward(self, x, b=torch.tensor(1.0)):
+                y = x + b
+                z = y.relu()
+                return (y, z)
+
+        func = Foo()
 
         tensor_x = torch.randn(1, 2, 3, dtype=torch.float32)
 
@@ -140,21 +146,24 @@ def func(x, b=torch.tensor(1.0)):
         "sympy operation tests don't need dynamic shape"
     )
     def test_sympy_operatons_return_numeric(self):
-        def func(x, y):
-            # TODO: add boolean tests when SymBool is supported
-            # to infer types
-            return (
-                torch.tensor([operator.add(x.item(), y.item())]),
-                torch.tensor([operator.sub(x.item(), y.item())]),
-                torch.tensor([operator.mul(x.item(), y.item())]),
-                torch.tensor([operator.truediv(x.item(), y.item())]),
-                torch.tensor([operator.floordiv(x.item(), y.item())]),
-                torch.tensor([operator.pow(x.item(), y.item())]),
-                torch.tensor([operator.abs(x.item())]),
-                torch.tensor([operator.neg(x.item())]),
-                torch.tensor([math.ceil(x.item())]),
-                torch.tensor([math.floor(x.item())]),
-            )
+        class Foo(torch.nn.Module):
+            def forward(self, x, y):
+                # TODO: add boolean tests when SymBool is supported
+                # to infer types
+                return (
+                    torch.tensor([operator.add(x.item(), y.item())]),
+                    torch.tensor([operator.sub(x.item(), y.item())]),
+                    torch.tensor([operator.mul(x.item(), y.item())]),
+                    torch.tensor([operator.truediv(x.item(), y.item())]),
+                    torch.tensor([operator.floordiv(x.item(), y.item())]),
+                    torch.tensor([operator.pow(x.item(), y.item())]),
+                    torch.tensor([operator.abs(x.item())]),
+                    torch.tensor([operator.neg(x.item())]),
+                    torch.tensor([math.ceil(x.item())]),
+                    torch.tensor([math.floor(x.item())]),
+                )
+
+        func = Foo()
 
         x = torch.randn(1, dtype=torch.float32)
         y = torch.randn(1, dtype=torch.float32)
@@ -171,10 +180,13 @@ def func(x, y):
         reason="https://github.com/pytorch/pytorch/issues/99534",
     )
     def test_xfail_func_with_non_tensor_args(self):
-        def func(x, b=1.0):
-            y = x + b
-            z = y.relu()
-            return (y, z)
+        class Foo(torch.nn.Module):
+            def forward(self, x, b=1.0):
+                y = x + b
+                z = y.relu()
+                return (y, z)
+
+        func = Foo()
 
         tensor_x = torch.randn(1, 1, 2, dtype=torch.float32)
 
@@ -202,25 +214,29 @@ def func(x, b=1.0):
             torch.testing.assert_close(ref_output, torch.tensor(ort_output))
 
     def test_func_with_nested_input_structure(self):
-        def func(
-            x_dict: Dict[str, torch.Tensor],
-            y_tuple: Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]],
-            z_list: List[List[torch.Tensor]],
-        ):
-            if "a" in x_dict:
-                x = x_dict["a"]
-            elif "b" in x_dict:
-                x = x_dict["b"]
-            else:
-                x = torch.randn(3)
+        class Foo(torch.nn.Module):
+            def forward(
+                self,
+                x_dict: Dict[str, torch.Tensor],
+                y_tuple: Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]],
+                z_list: List[List[torch.Tensor]],
+            ):
+                if "a" in x_dict:
+                    x = x_dict["a"]
+                elif "b" in x_dict:
+                    x = x_dict["b"]
+                else:
+                    x = torch.randn(3)
 
-            y1, (y2, y3) = y_tuple
+                y1, (y2, y3) = y_tuple
 
-            z = x + y1 + y2 + y3
-            for z_sub_list in z_list:
-                z = z + torch.stack(z_sub_list).sum()
+                z = x + y1 + y2 + y3
+                for z_sub_list in z_list:
+                    z = z + torch.stack(z_sub_list).sum()
 
-            return z
+                return z
+
+        func = Foo()
 
         x_dict = {"a": torch.randn(3), "c": torch.randn(3)}
         y_tuple = (torch.randn(3), (torch.randn(3), torch.randn(3)))
@@ -233,14 +249,17 @@ def func(
         )
 
     def test_func_with_nested_output_structure(self):
-        def func(x, y, z):
-            x = x + y
-            y = y + z
-            z = x + y
-            out1 = (x, (y, z))
-            out2 = [[x, y], [y, z]]
-            out3 = {"z": z, "x": x}
-            return out1, out2, out3
+        class Foo(torch.nn.Module):
+            def forward(self, x, y, z):
+                x = x + y
+                y = y + z
+                z = x + y
+                out1 = (x, (y, z))
+                out2 = [[x, y], [y, z]]
+                out3 = {"z": z, "x": x}
+                return out1, out2, out3
+
+        func = Foo()
 
         x = torch.randn(3)
         y = torch.randn(3)
@@ -535,19 +554,22 @@ def forward(self, x):
 
     @pytorch_test_common.skipIfNoCuda
     def test__scaled_dot_product_flash_attention(self):
-        def func(x):
-            (
-                output,
-                _,
-                _,
-                _,
-                _,
-                _,
-                _,
-                _,
-                _,
-            ) = torch.ops.aten._scaled_dot_product_flash_attention(x, x, x)
-            return output
+        class Foo(torch.nn.Module):
+            def forward(self, x):
+                (
+                    output,
+                    _,
+                    _,
+                    _,
+                    _,
+                    _,
+                    _,
+                    _,
+                    _,
+                ) = torch.ops.aten._scaled_dot_product_flash_attention(x, x, x)
+                return output
+
+        func = Foo()
 
         x = torch.randn(1, 1, 1, 32, device=torch.device("cuda"))
         self.run_test_with_fx_to_onnx_exporter_and_onnx_runtime(func, (x,))
@@ -597,9 +619,12 @@ def forward(
         )
 
     def test_operator_with_data_dependent_output(self):
-        def func(x):
-            # Repro from llama. Emits `torch.ops.aten._local_scalar_dense`.
-            return x + torch.full(x.shape, torch.tensor(torch.finfo(x.dtype).min))
+        class Foo(torch.nn.Module):
+            def forward(self, x):
+                # Repro from llama. Emits `torch.ops.aten._local_scalar_dense`.
+                return x + torch.full(x.shape, torch.tensor(torch.finfo(x.dtype).min))
+
+        func = Foo()
 
         self.run_test_with_fx_to_onnx_exporter_and_onnx_runtime(
             func, (torch.randn(3, 4),)
@@ -610,8 +635,11 @@ def func(x):
         reason="https://github.com/pytorch/pytorch/issues/112622",
     )
     def test_operator_with_scalar_output(self):
-        def func(x, y):
-            return x.item() + y
+        class Foo(torch.nn.Module):
+            def forward(self, x, y):
+                return x.item() + y
+
+        func = Foo()
 
         self.run_test_with_fx_to_onnx_exporter_and_onnx_runtime(
             func, (torch.tensor([1]), torch.randn(3, 4))
@@ -622,8 +650,11 @@ def func(x, y):
         reason="https://github.com/pytorch/pytorch/issues/112622",
     )
     def test_operator_with_dynamic_output_shape(self):
-        def func(x):
-            return x.nonzero()
+        class Foo(torch.nn.Module):
+            def forward(self, x):
+                return x.nonzero()
+
+        func = Foo()
 
         self.run_test_with_fx_to_onnx_exporter_and_onnx_runtime(
             func, (torch.randn(3, 4),)
diff --git a/test/onnx/torch_export/test_torch_export_with_onnxruntime.py b/test/onnx/torch_export/test_torch_export_with_onnxruntime.py
@@ -84,8 +84,11 @@ def forward(self, x):
         )
 
     def test_exported_program_with_specialized_input_during_tracing(self):
-        def f(x, y):
-            return x + y
+        class Foo(torch.nn.Module):
+            def forward(self, x, y):
+                return x + y
+
+        f = Foo()
 
         tensor_input = torch.ones(7, 5)
         dim0_x = torch.export.Dim("dim0_x", min=6)
@@ -131,7 +134,7 @@ def forward(self, x):
         # NOTE: If input is ExportedProgram, we need to specify dynamic_shapes
         # as a tuple.
         reexported_program = torch.export.export(
-            exported_program, (tensor_input,), dynamic_shapes=({0: dim0_x},)
+            exported_program.module(), (tensor_input,), dynamic_shapes=({0: dim0_x},)
         )
         reexported_onnx_program = torch.onnx.dynamo_export(
             reexported_program, tensor_input
@@ -145,8 +148,11 @@ def forward(self, x):
         )
 
     def test_onnx_program_supports_none_arg_name_in_dynamic(self):
-        def foo(a, b):
-            return a.sum() + b.sum()
+        class Foo(torch.nn.Module):
+            def forward(self, a, b):
+                return a.sum() + b.sum()
+
+        foo = Foo()
 
         dim = torch.export.Dim("dim")
         exported_program = torch.export.export(
@@ -165,8 +171,11 @@ def foo(a, b):
         )
 
     def test_onnx_program_suppors_non_arg_name_with_kwarg(self):
-        def foo(a, b, kw1, kw2):
-            return a.sum() + b.sum() + kw1.sum() - kw2.sum()
+        class Foo(torch.nn.Module):
+            def forward(self, a, b, kw1, kw2):
+                return a.sum() + b.sum() + kw1.sum() - kw2.sum()
+
+        foo = Foo()
 
         dim = torch.export.Dim("dim")
         dim_for_kw1 = torch.export.Dim("dim_for_kw1")
@@ -238,8 +247,11 @@ def forward(self, x, b):
             )
 
     def test_onnx_program_supports_non_arg_name_with_container_type(self):
-        def foo(a, b):
-            return a[0].sum() + a[1].sum() + b.sum()
+        class Foo(torch.nn.Module):
+            def forward(self, a, b):
+                return a[0].sum() + a[1].sum() + b.sum()
+
+        foo = Foo()
 
         inp_a = (torch.randn(4, 4), torch.randn(4, 4))
         inp_b = torch.randn(4, 4)
diff --git a/torch/export/__init__.py b/torch/export/__init__.py
@@ -74,7 +74,7 @@
 
 
 def export(
-    f: Callable,
+    mod: torch.nn.Module,
     args: Tuple[Any, ...],
     kwargs: Optional[Dict[str, Any]] = None,
     *,
@@ -124,7 +124,7 @@ def export(
     ``dynamic_shapes`` argument to your :func:`export` call.
 
     Args:
-        f: The callable to trace.
+        mod: We will trace the forward method of this module.
 
         args: Example positional inputs.
 
@@ -179,6 +179,11 @@ def export(
     from ._trace import _export
     from .dynamic_shapes import _process_dynamic_shapes
 
+    if not isinstance(mod, torch.nn.Module):
+        raise ValueError(
+            f"Expected `mod` to be an instance of `torch.nn.Module`, got {type(mod)}."
+        )
+
     if constraints is not None:
         warnings.warn(
             "Using `constraints` to specify dynamic shapes for export is DEPRECATED "
@@ -188,10 +193,10 @@ def export(
             stacklevel=2,
         )
     else:
-        constraints = _process_dynamic_shapes(f, args, kwargs, dynamic_shapes)
+        constraints = _process_dynamic_shapes(mod, args, kwargs, dynamic_shapes)
 
     return _export(
-        f,
+        mod,
         args,
         kwargs,
         constraints,