Revert "Propagate buffer and parameter indices through AOT (pytorch#130393)"

pytorchmergebot · pytorchmergebot · commit cbda8be537fc · 2024-07-16T15:43:34.000Z
This reverts commit 69a7738. Reverted pytorch#130393 on behalf of https://github.com/clee2000 due to broke lint for torch/_functorch/_aot_autograd/subclass_utils.py https://github.com/pytorch/pytorch/actions/runs/9948630877/job/27483551649 https://hud.pytorch.org/pytorch/pytorch/commit/80236dca90b0874cb2b6f9c9fa5f159c55726401 lint was green on PR, probably a landrace ([comment](pytorch#130393 (comment)))
diff --git a/test/dynamo/test_subclasses.py b/test/dynamo/test_subclasses.py
@@ -1526,45 +1526,6 @@ def f(x):
                 out_test = compiled_f(view)
                 self.assertEqual(out_ref, out_test)
 
-    @torch._dynamo.config.patch("inline_inbuilt_nn_modules", True)
-    def test_mark_static_with_subclass_desugaring(self):
-        from typing import Any, Callable, Dict, List, Optional
-
-        from torch._dynamo.decorators import mark_static_address
-        from torch._inductor.compile_fx import compile_fx
-        from torch._inductor.cudagraph_utils import BoxedDeviceIndex
-        from torch._inductor.utils import BoxedBool
-
-        x_inner = torch.ones(4)
-        x = TwoTensor(x_inner, x_inner)
-        mark_static_address(x, guard=False)
-
-        def inner_compile(
-            gm: torch.fx.GraphModule,
-            example_inputs: List[torch.Tensor],
-            cudagraphs: Optional[BoxedBool] = None,
-            static_input_idxs: Optional[List[int]] = None,
-            is_backward: bool = False,
-            graph_id: Optional[int] = None,
-            cpp_wrapper: bool = False,
-            aot_mode: bool = False,
-            is_inference: bool = False,
-            boxed_forward_device_index: Optional[BoxedDeviceIndex] = None,
-            user_visible_outputs: Optional[Dict[str, None]] = None,
-            layout_opt: Optional[bool] = None,
-            extern_node_serializer: Optional[Callable[[List[Any]], Any]] = None,
-        ):
-            self.assertEqual(static_input_idxs, [1, 2])
-            return gm
-
-        compiler = functools.partial(compile_fx, inner_compile=inner_compile)
-
-        @torch.compile(backend=compiler)
-        def fn(t0, t1, t2):
-            return t0 + t1 + t2 + 2
-
-        fn(torch.ones(4), x, torch.ones(4))
-
 
 instantiate_parametrized_tests(SubclassTests)
 
diff --git a/torch/_functorch/_aot_autograd/collect_metadata_analysis.py b/torch/_functorch/_aot_autograd/collect_metadata_analysis.py
@@ -11,7 +11,7 @@
 import collections
 import logging
 from functools import wraps
-from typing import Callable, DefaultDict, Dict, List, Optional
+from typing import Callable, DefaultDict, Dict, List
 
 import torch
 import torch.utils._pytree as pytree
@@ -25,7 +25,6 @@
     is_traceable_wrapper_subclass,
     transform_subclass,
 )
-
 from .functional_utils import (
     are_all_mutations_hidden_from_autograd,
     are_all_mutations_under_no_grad_or_inference_mode,
@@ -125,8 +124,6 @@ def run_functionalized_fw_and_collect_metadata(
     keep_input_mutations: bool,
     # TODO: refactor to kill this flag
     is_train: bool = False,
-    # Note: this is guaranteed to be set when running under dynamo
-    static_input_indices: Optional[List[int]] = None,
     pre_dispatch: bool = False,
 ) -> Callable[..., ViewAndMutationMeta]:
     memo: Dict[Tensor, Tensor] = {}
@@ -669,15 +666,17 @@ def view_avoid_dupes_with_primals(t):
         )
         user_outs = pytree.tree_map(from_fun, f_output_tangents)
 
-        nonlocal static_input_indices
-        static_input_indices = static_input_indices or []
-        if torch._dynamo.compiled_autograd.in_compiled_autograd_region:
-            passed_indices = set(static_input_indices)
-            static_input_indices = [
+        if (
+            torch._dynamo.config.inline_inbuilt_nn_modules
+            or torch._dynamo.compiled_autograd.in_compiled_autograd_region
+        ):
+            static_parameter_input_indices = [
                 i
                 for i, arg in enumerate(flat_args)
-                if (isinstance(arg, torch.nn.Parameter) or i in passed_indices)
+                if isinstance(arg, torch.nn.Parameter)
             ]
+        else:
+            static_parameter_input_indices = []
 
         f_mutated_inputs = [
             inp
@@ -730,7 +729,7 @@ def view_avoid_dupes_with_primals(t):
             subclass_tangent_meta=create_subclass_meta(traced_tangents),
             is_train=is_train,
             grad_enabled_mutation=grad_enabled_mutation,
-            static_input_indices=static_input_indices,
+            static_parameter_indices=static_parameter_input_indices,
             tokens=mode._tokens,
         )
         return metadata
diff --git a/torch/_functorch/_aot_autograd/runtime_wrappers.py b/torch/_functorch/_aot_autograd/runtime_wrappers.py
@@ -905,7 +905,6 @@ def wrapped_flat_fn(*args):
         if config.debug_assert:
             ref_fw_metadata = run_functionalized_fw_and_collect_metadata(
                 wrapped_flat_fn,
-                static_input_indices=aot_config.static_input_indices,
                 keep_input_mutations=fw_metadata.keep_input_mutations,
                 is_train=fw_metadata.is_train,
             )(*deduped_flat_args)
@@ -1095,7 +1094,6 @@ def wrapped_flat_fn(*args):
         if config.debug_assert:
             ref_fw_metadata = run_functionalized_fw_and_collect_metadata(
                 wrapped_flat_fn,
-                static_input_indices=aot_config.static_input_indices,
                 keep_input_mutations=fw_metadata.keep_input_mutations,
                 is_train=fw_metadata.is_train,
             )(*flat_args_with_synthetic_bases)
diff --git a/torch/_functorch/_aot_autograd/schemas.py b/torch/_functorch/_aot_autograd/schemas.py
@@ -329,7 +329,7 @@ class ViewAndMutationMeta:
     deterministic: Optional[bool] = None
 
     # Keeps track of which input indices store parameters (which we will treat as static)
-    static_input_indices: List[int] = field(default_factory=list)
+    static_parameter_indices: List[int] = field(default_factory=list)
 
     # Map of effect type (ex. _EffectType.ORDERED) to token.  If there are
     # side-effectful operators, FunctionalTensorMode will populate this
@@ -803,7 +803,6 @@ class AOTConfig:
     no_tangents: bool = False
     dynamic_shapes: bool = False
     aot_autograd_arg_pos_to_source: Optional[List[Source]] = None
-    static_input_indices: Optional[List[int]] = None
     inference_compiler: Optional[Callable] = None
     enable_log: bool = True
     # this is always false outside of export.
diff --git a/torch/_functorch/_aot_autograd/subclass_utils.py b/torch/_functorch/_aot_autograd/subclass_utils.py
@@ -136,24 +136,6 @@ def concat_inner_tensors_from_subclasses(xs):
     return unwrapped_args
 
 
-def remap_unwrapped_subclass_arg_indices(wrapped_args, static_input_indices):
-    static_input_indices = set(static_input_indices)
-    new_ind = 0
-    remapped_static_indices = []
-    for i, arg in enumerate(wrapped_args):
-        num_indices = 1
-        if is_traceable_wrapper_subclass(arg):
-            num_indices = len(get_plain_tensors(arg))
-
-        for _ in range(num_indices):
-            if i in static_input_indices:
-                remapped_static_indices.append(new_ind)
-
-            new_ind += 1
-
-    return remapped_static_indices
-
-
 # Turns a flattened list of tensor arguments into (maybe) subclass tensors.
 # This function is used both at trace time and runtime, so we have an is_runtime flag telling us which context we're in.
 def wrap_tensor_subclasses(
diff --git a/torch/_functorch/_aot_autograd/traced_function_transforms.py b/torch/_functorch/_aot_autograd/traced_function_transforms.py
@@ -53,7 +53,6 @@
 )
 from .subclass_utils import (
     create_subclass_meta,
-    remap_unwrapped_subclass_arg_indices,
     requires_subclass_dispatch,
     unwrap_tensor_subclasses,
     wrap_tensor_subclasses_maybe_joint,
@@ -703,9 +702,6 @@ def metadata_fn(*primals):
     args_unwrapped = unwrap_tensor_subclasses(
         args, is_joint_structure=is_joint_structure
     )
-    remapped_static_indices = remap_unwrapped_subclass_arg_indices(
-        args, meta.static_input_indices
-    )
 
     if is_joint_structure:
         primals_unwrapped = args_unwrapped[0]
@@ -733,7 +729,6 @@ def metadata_fn(*primals):
     # See Note: [Partitioner handling for Subclasses, Part 2] for more info.
     meta_updated = run_functionalized_fw_and_collect_metadata(
         metadata_fn,
-        static_input_indices=remapped_static_indices,
         keep_input_mutations=meta.keep_input_mutations,
         is_train=meta.is_train,
     )(*primals_unwrapped)
diff --git a/torch/_functorch/aot_autograd.py b/torch/_functorch/aot_autograd.py
@@ -20,7 +20,6 @@
 from torch.fx.experimental.proxy_tensor import make_fx
 from torch.fx.experimental.symbolic_shapes import ShapeEnv
 from torch.utils._python_dispatch import is_traceable_wrapper_subclass
-
 from . import config
 from ._aot_autograd.autograd_cache import (  # noqa: F401
     AOTAutogradCache,
@@ -589,7 +588,6 @@ def _dup_fake_script_obj(fake_flat_args):
                 with ctx:
                     fw_metadata = run_functionalized_fw_and_collect_metadata(
                         flat_fn,
-                        static_input_indices=aot_config.static_input_indices,
                         keep_input_mutations=aot_config.keep_inference_input_mutations,
                         is_train=needs_autograd,
                         pre_dispatch=aot_config.pre_dispatch,
@@ -625,7 +623,6 @@ def _dup_fake_script_obj(fake_flat_args):
                             keep_input_mutations=aot_config.keep_inference_input_mutations,
                             is_train=False,
                             pre_dispatch=aot_config.pre_dispatch,
-                            static_input_indices=aot_config.static_input_indices,
                         )(*fake_flat_args)
                     else:
                         fw_metadata = ViewAndMutationMeta(
@@ -639,7 +636,7 @@ def _dup_fake_script_obj(fake_flat_args):
                             subclass_tangent_meta=fw_metadata.subclass_tangent_meta,
                             is_train=False,
                             tokens=fw_metadata.tokens,
-                            static_input_indices=fw_metadata.static_input_indices,
+                            static_parameter_indices=fw_metadata.static_parameter_indices,
                         )
 
         if fw_metadata.num_intermediate_bases > 0:
@@ -944,10 +941,9 @@ def aot_module_simplified(
     # Next, the input args
     full_args.extend(args)
 
-    static_input_indices = []
     if hasattr(mod, "graph"):
         # Non dynamo entrypoints can get to here...
-        for pos, node in enumerate(mod.graph.find_nodes(op="placeholder")):
+        for node in mod.graph.find_nodes(op="placeholder"):
             if hasattr(node, "_dynamo_source"):
                 # ... but not here!
                 if aot_autograd_arg_pos_to_source is None:
@@ -957,11 +953,6 @@ def aot_module_simplified(
                 seen_sources.add(source)
                 aot_autograd_arg_pos_to_source.append(source)
 
-                if "tensor_dict" in node.meta and node.meta["tensor_dict"].get(
-                    "_dynamo_static_input_type", None
-                ):
-                    static_input_indices.append(pos)
-
     if aot_autograd_arg_pos_to_source is not None:
         assert len(full_args) == len(aot_autograd_arg_pos_to_source)
 
@@ -982,7 +973,6 @@ def aot_module_simplified(
         keep_inference_input_mutations=keep_inference_input_mutations,
         dynamic_shapes=dynamic_shapes,
         aot_autograd_arg_pos_to_source=aot_autograd_arg_pos_to_source,
-        static_input_indices=static_input_indices,
         is_export=False,
         no_tangents=False,
         cache_key=None,
diff --git a/torch/_inductor/compile_fx.py b/torch/_inductor/compile_fx.py
@@ -136,7 +136,7 @@ def get_static_input_idxs(num_fixed):
     if not context or not context.fw_metadata:
         return fixed
 
-    return fixed + context.fw_metadata.static_input_indices
+    return fixed + context.fw_metadata.static_parameter_indices
 
 
 @functools.lru_cache(None)
@@ -1246,7 +1246,7 @@ def fw_compiler_freezing(
                 params_flat[i] = None
 
         if tracing_context.fw_metadata:
-            static_input_idxs += tracing_context.fw_metadata.static_input_indices
+            static_input_idxs += tracing_context.fw_metadata.static_parameter_indices
 
     with mock.patch.object(fake_mode, "allow_non_fake_inputs", True):
         optimized_function = inner_compile(