emit programs with mutable buffers (#2233)

JacobSzwejbka · facebook-github-bot · commit b77fd574df5c · 2024-03-05T10:33:26.000-08:00
Summary:

Meaningful changes to the emitter logic here.

Before we would ignore the tensor spec passed in and try to decide if the placeholder was a constant and if it was we would create a new spec from the actual value for that constant. That drops meta data on the input spec which is not great. Now instead of that we just look up the storage of the concrete tensor and hook it up to the spec.

Also added some logic to seperate out behavior for mutable buffers specifically.

While working on this I also discovered a bug that memory planning is planning space for parameters and constant buffers if its told to allocate space for inputs which is really bad lol.

Oh one big assumption this diff makes is that the buffer does not have a meaningful initial state. I should probably throw out a warning during emission about this in the short term. Long term we will handle them properly.

bypass-github-export-checks

Reviewed By: tarun292

Differential Revision: D53713544
diff --git a/exir/emit/_emit_program.py b/exir/emit/_emit_program.py
@@ -5,6 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 # pyre-strict
+import copy
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Union
 
@@ -32,7 +33,8 @@
 )
 from executorch.exir.tensor import layout_enum, scalar_type_enum
 from executorch.exir.version import EXECUTORCH_SCHEMA_VERSION
-from torch.export.exported_program import ExportedProgram
+from torch.export.exported_program import ExportedProgram, OutputKind
+from torch.utils import _pytree as pytree
 
 
 def _emit_prim_getters(prim_getters: Dict[str, Any]) -> List[ExecutionPlan]:
@@ -122,6 +124,36 @@ class EmitterOutput:
     ]
 
 
+def _remove_non_user_outputs(exported_program: ExportedProgram) -> torch.fx.GraphModule:
+    gm = copy.deepcopy(exported_program.graph_module)
+    output_node = None
+    for node in gm.graph.nodes:
+        if node.op == "output":
+            output_node = node
+    assert output_node is not None
+
+    mutated_outputs: List[Optional[str]] = [
+        out_spec.target if out_spec.kind in (OutputKind.BUFFER_MUTATION,) else None
+        for out_spec in exported_program.graph_signature.output_specs
+    ]
+    outputs = pytree.tree_flatten(output_node.args)[0]
+
+    user_output_nodes = []
+    for return_node, mutated_node_name in zip(outputs, mutated_outputs):
+        if mutated_node_name is None:
+            user_output_nodes.append(return_node)
+            continue
+
+    with gm.graph.inserting_before(output_node):
+        # Only return user outputs
+        new_output = gm.graph.output(tuple(user_output_nodes))
+        new_output.meta = output_node.meta.copy()
+        output_node.replace_all_uses_with(new_output)
+        gm.graph.erase_node(output_node)
+
+    return gm
+
+
 def emit_program(
     methods: Union[ExportedProgram, Dict[str, ExportedProgram]],
     emit_stacktrace: bool = False,
@@ -163,13 +195,6 @@ def emit_program(
 
     # emit each entry point in order according to name.
     for name, exported_program in sorted(methods.items()):
-        if (
-            exported_program.graph_signature.buffers_to_mutate
-        ):  # see if we are mutating any state
-            raise ExportError(
-                ExportErrorType.INVALID_INPUT_TYPE,
-                "Buffers cannot be modified in executorch.",
-            )
         # create empty state
         emitter_state = _EmitterState(
             values=[],
@@ -180,7 +205,11 @@ def emit_program(
             emit_stacktrace=emit_stacktrace,
         )
 
-        emitter = _TopLevelEmitter(name, exported_program, program_state, emitter_state)
+        gm = _remove_non_user_outputs(exported_program)
+
+        emitter = _TopLevelEmitter(
+            name, exported_program, gm, program_state, emitter_state
+        )
 
         emitter.run()
         plans.append(emitter.plan())
diff --git a/exir/emit/_emitter.py b/exir/emit/_emitter.py
@@ -30,6 +30,7 @@
 # pyre-strict
 import ctypes
 import hashlib
+import warnings
 import operator
 import typing
 from dataclasses import dataclass, field
@@ -1266,15 +1267,17 @@ def __init__(
         self,
         name: str,
         exported_program: ExportedProgram,
+        graph_module: torch.fx.GraphModule,
         program_state: _ProgramState,
         emitter_state: _EmitterState,
     ) -> None:
-        super().__init__(exported_program.graph_module, emitter_state, program_state)
+        super().__init__(graph_module, emitter_state, program_state)
         self.name = name
         self.exported_program = exported_program
 
         self.inputs: List[int] = []
         self.outputs: List[int] = []
+        self.given_mutable_buffer_warning = False
 
         def create_container_str(spec: Optional[pytree.TreeSpec]) -> str:
             if spec is None:
@@ -1302,40 +1305,57 @@ def placeholder(
         https://pytorch.org/docs/stable/fx.html#torch.fx.Graph.placeholder
         """
         spec = self.node.meta["spec"]
-        const_tensor = False
-        if isinstance(target, str) and (
-            target in self.exported_program.graph_signature.inputs_to_parameters
-            or target in self.exported_program.graph_signature.inputs_to_buffers
-            or target
-            in self.exported_program.graph_signature.inputs_to_lifted_tensor_constants
-        ):
-            if (
+        is_user_input = True
+
+        if isinstance(target, str) and isinstance(spec, TensorSpec):
+            # Find the fully qualified name
+            fqn = None
+            is_mutable_buffer = False
+            if target in self.exported_program.graph_signature.inputs_to_parameters:
+                fqn = self.exported_program.graph_signature.inputs_to_parameters[target]
+
+            elif target in self.exported_program.graph_signature.inputs_to_buffers:
+                fqn = self.exported_program.graph_signature.inputs_to_buffers[target]
+
+                # if the buffer is mutated then record that
+                if (
+                    fqn
+                    in self.exported_program.graph_signature.buffers_to_mutate.values()
+                ):
+                    is_mutable_buffer = True
+                    if not self.given_mutable_buffer_warning:
+                        warnings.warn(
+                            "Mutation on a buffer in the model is detected. ExecuTorch assumes "
+                            "buffers that are mutated in the graph have a meaningless initial state, "
+                            "only the shape and dtype will be serialized.",
+                            UserWarning,
+                            stacklevel=1,
+                        )
+                        self.mutable_buffer_warning_count = True
+
+            elif (
                 target
                 in self.exported_program.graph_signature.inputs_to_lifted_tensor_constants
             ):
                 fqn = self.exported_program.graph_signature.inputs_to_lifted_tensor_constants[
                     target
                 ]
-            elif target in self.exported_program.graph_signature.inputs_to_buffers:
-                fqn = self.exported_program.graph_signature.inputs_to_buffers[target]
-            else:
-                fqn = self.exported_program.graph_signature.inputs_to_parameters[target]
+
+            # From the fqn find the corresponding tensor
+            real_tensor = None
             if fqn in self.exported_program.state_dict:
-                spec = TensorSpec.from_tensor(
-                    self.exported_program.state_dict[fqn], const=True
-                )
-                const_tensor = True
+                real_tensor = self.exported_program.state_dict[fqn]
+                is_user_input = False
+
             elif fqn in self.exported_program.constants:
-                spec = TensorSpec.from_tensor(
-                    self.exported_program.constants[fqn], const=True
-                )
-                const_tensor = True
-            else:
+                real_tensor = self.exported_program.constants[fqn]
+                is_user_input = False
+            elif fqn is not None:
                 buffers = self.exported_program.named_buffers()
                 buf = next((x[1] for x in buffers if x[0] == fqn), None)
                 if buf is not None:
-                    spec = TensorSpec.from_tensor(buf, const=True)
-                    const_tensor = True
+                    real_tensor = buf
+                    is_user_input = False
                 else:
                     raise InternalError(
                         self._emit_node_specific_error(
@@ -1344,13 +1364,28 @@ def placeholder(
                         )
                     )
 
+            # assign the storage of the placeholder spec to the storage of the real tensor if there is one
+            if real_tensor is not None:
+                # for non-contigous tensors, convert to a contiguous one
+                real_tensor = real_tensor.contiguous()
+                # Weights cannot be views during emission or serialization
+                if real_tensor.nbytes != real_tensor.untyped_storage().nbytes():
+                    real_tensor = real_tensor.clone()
+
+                spec.storage = real_tensor.untyped_storage()
+
+            # User inputs and mutable buffers are not constants, other buffers or parameters are.
+            spec.const = not (is_user_input or is_mutable_buffer)
+
         evalue = (
             self._tensor_spec_to_evalue(spec)
             if isinstance(spec, TensorSpec)
             else self._constant_to_evalue(spec, None)
         )
         value = self._emit_evalue(evalue)
-        if not const_tensor:
+
+        # Only user inputs should remain as inputs.
+        if is_user_input:
             self.inputs.append(value.id)
 
         return value
diff --git a/exir/emit/test/TARGETS b/exir/emit/test/TARGETS
@@ -21,6 +21,6 @@ python_unittest(
         "//executorch/exir/passes:constant_prop_pass",
         "//executorch/exir/tests:lib",
         "//executorch/exir/tests:models",
-        "//executorch/extension/pybindings:portable_lib",  # @manual
+        "//executorch/extension/pybindings:aten_lib",
     ],
 )
diff --git a/exir/emit/test/test_emit.py b/exir/emit/test/test_emit.py
@@ -19,6 +19,7 @@
 from executorch.exir import EdgeCompileConfig, ExecutorchProgramManager, to_edge
 from executorch.exir.backend.backend_api import to_backend
 from executorch.exir.backend.backend_details import BackendDetails, PreprocessResult
+from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.emit import emit_program  # noqa
 from executorch.exir.passes.constant_prop_pass import constant_prop_pass
 from executorch.exir.passes.sym_shape_eval_pass import ConstraintBasedSymShapeEvalPass
@@ -42,6 +43,7 @@
 )
 from executorch.exir.tests.common import register_additional_test_aten_ops
 from executorch.exir.tests.models import Mul
+from executorch.extension.pybindings.aten_lib import _load_for_executorch_from_buffer
 from functorch.experimental import control_flow
 from torch import nn
 
@@ -1028,7 +1030,7 @@ def forward(self, k: torch.Tensor) -> torch.Tensor:
         edge = to_edge(captured)
         from executorch.exir.passes import MemoryPlanningPass
 
-        config = exir.ExecutorchBackendConfig(
+        config = exir.ExecutorchBackendConfig(  # pyre-ignore[28]
             sym_shape_eval_pass=ConstraintBasedSymShapeEvalPass(),
             memory_planning_pass=MemoryPlanningPass(
                 memory_planning_algo="greedy",
@@ -1393,3 +1395,41 @@ def forward(self, x):
         self.assertEqual(len(exec_plan.inputs), 1)
         self.assertEqual(len(program.constant_buffer), 2)
         self.assertEqual(len(program.constant_buffer[1].storage), 24)
+
+    def test_mutable_buffers(self) -> None:
+        def count_copies(gm: torch.fx.GraphModule) -> int:
+            return sum(
+                (
+                    node.target == torch.ops.aten.copy_
+                    or node.target == exir_ops.edge.aten.copy_.default
+                )
+                for node in gm.graph.nodes
+            )
+
+        class MutableStateModule(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.register_buffer("state", torch.zeros(1))
+
+            def forward(self, x):
+                y = x + self.state
+                self.state.add_(1)
+                return y
+
+        model = to_edge(
+            export(
+                MutableStateModule(),
+                (torch.zeros(1),),
+            )
+        )
+        model = model.to_executorch()
+        model.dump_executorch_program(True)
+        self.assertTrue(
+            model.executorch_program.execution_plan[0]  # pyre-ignore[16]
+            .values[0]
+            .val.allocation_info
+            is not None
+        )
+        executorch_module = _load_for_executorch_from_buffer(model.buffer)
+        self.assertEqual(executorch_module(torch.zeros(1))[0], torch.zeros(1))
+        self.assertEqual(executorch_module(torch.zeros(1))[0], torch.zeros(1) + 1)

Original file line number	Diff line number	Diff line change
`@@ -21,6 +21,6 @@ python_unittest(`
`21`	`21`	`"//executorch/exir/passes:constant_prop_pass",`
`22`	`22`	`"//executorch/exir/tests:lib",`
`23`	`23`	`"//executorch/exir/tests:models",`
`24`		`- "//executorch/extension/pybindings:portable_lib", # @manual`
	`24`	`+ "//executorch/extension/pybindings:aten_lib",`
`25`	`25`	`],`
`26`	`26`	`)`