Replace node.meta source_fn with source_fn_stack (#108595)

ydwu4 · facebook-github-bot · commit 91c591034237 · 2023-09-07T09:26:57.000-07:00
Summary: X-link: pytorch/executorch#210 A resubmit of #108447. Copy over the descriptions: This is a follow-up of the discussion in #108356, where we want to repalce source_fn with source_fn_stack Test Plan: See added tests in test_higher_order_ops.py and modify existing test. Differential Revision: D48984986 Pulled By: ydwu4
diff --git a/test/dynamo/test_aot_autograd.py b/test/dynamo/test_aot_autograd.py
@@ -798,10 +798,11 @@ def _prepare_model_args():
                     continue
                 if min_seq_nr < 0:
                     min_seq_nr = seq_nr
-                mod_name = node.meta.get("source_fn", "")
+                source_fn_stack = node.meta.get("source_fn_stack", [])
                 orig_aten = node.meta.get("original_aten", "")
-                if isinstance(mod_name, tuple):
-                    mod_name = mod_name[0]
+                mod_name = ""
+                if len(source_fn_stack) > 0:
+                    mod_name = source_fn_stack[-1][0]
                 # Make all seq_nr relative so it starts at 0
                 seq_nr = seq_nr - min_seq_nr
                 seq_table = seq_table + f"{seq_nr}|{orig_aten}|{mod_name}\n"
diff --git a/test/dynamo/test_export.py b/test/dynamo/test_export.py
@@ -957,7 +957,7 @@ def forward(self, x):
             if node.op not in {"placeholder", "output"}:
                 self.assertTrue(node.stack_trace is not None)
                 self.assertTrue(node.meta["nn_module_stack"] is not None)
-                self.assertTrue(node.meta["source_fn"] is not None)
+                self.assertTrue(node.meta["source_fn_stack"] is not None)
 
         torch._dynamo.reset()
 
@@ -967,7 +967,7 @@ def forward(self, x):
             if node.op == "call_function":
                 self.assertTrue(node.stack_trace is not None)
                 self.assertTrue(node.meta["nn_module_stack"] is not None)
-                self.assertTrue(node.meta["source_fn"] is not None)
+                self.assertTrue(node.meta["source_fn_stack"] is not None)
                 self.assertTrue(node.meta["val"] is not None)
                 self.assertTrue(node.meta["original_aten"] is not None)
 
diff --git a/test/dynamo/test_higher_order_ops.py b/test/dynamo/test_higher_order_ops.py
@@ -1605,6 +1605,149 @@ def fn(x):
 
         self.assertTrue(activations.keys() == forward_handles.keys())
 
+    def _check_source_fn_stack(self, gm, exp_stack_dict):
+        for mod in gm.modules():
+            for node in mod.graph.nodes:
+                print(node.name)
+                if node.name in exp_stack_dict:
+                    exp_stack = exp_stack_dict[node.name]
+                    actual_stack = [
+                        name for name, _ in node.meta.get("source_fn_stack", [])
+                    ]
+                    print(f"{exp_stack}, {actual_stack}")
+                    self.assertEqual(actual_stack, exp_stack)
+
+    def test_wrap_source_fn_stack(self):
+        class MockModule(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.linear = torch.nn.Linear(4, 4)
+
+            def forward(self, x):
+                return self.linear(x)
+
+        mod = MockModule()
+
+        def gn(x):
+            return torch.cos(x) + wrap(mod, x)
+
+        def fn(x):
+            return wrap(gn, x)
+
+        backend = EagerAndRecordGraphs()
+        inp = torch.randn((4, 4))
+        torch.compile(fn, backend=backend, fullgraph=True)(inp)
+
+        gm = backend.graphs[0]
+        exp_stack = {
+            "cos": ["wrap", "cos"],
+            "add": ["wrap", "add"],
+            "linear": ["wrap", "wrap", "linear"],
+        }
+        self._check_source_fn_stack(gm, exp_stack)
+
+    def test_cond_source_fn_stack(self):
+        backend = EagerAndRecordGraphs()
+
+        @torch.compile(backend=backend, fullgraph=True)
+        def cond_f(pred, pred2, x, y):
+            def true_fn(pred2, x, y):
+                return x + y
+
+            def false_fn(pred2, x, y):
+                def true_fn2(x, y):
+                    return x.sin() - y.cos()
+
+                def false_fn2(x, y):
+                    return x.cos() - y.sin()
+
+                return control_flow.cond(pred2, true_fn2, false_fn2, [x, y])
+
+            return control_flow.cond(pred, true_fn, false_fn, [pred2, x, y])
+
+        pred = torch.tensor(True)
+        pred2 = torch.tensor(False)
+        xs = torch.randn(2, 3, 3)
+        y = torch.randn(3, 3)
+        cond_f(pred, pred2, xs, y)
+
+        gm = backend.graphs[0]
+        exp_stack = {
+            "add": ["cond", "add"],
+            "cos": ["cond", "cond", "cos"],
+            "sin": ["cond", "cond", "sin"],
+            "sub": ["cond", "cond", "sub"],
+        }
+        self._check_source_fn_stack(gm, exp_stack)
+
+    def test_map_source_fn_stack(self):
+        backend = EagerAndRecordGraphs()
+
+        xs = torch.randn(2, 3, 3)
+        y = torch.randn(3)
+
+        @torch.compile(backend=backend, fullgraph=True)
+        def map_f(xs, y):
+            def inner(x, y):
+                def inner2(x, y):
+                    return x + y
+
+                return control_flow.map(inner2, x, y) * y.cos()
+
+            return control_flow.map(inner, xs, y).sin()
+
+        result = map_f(xs, y)
+
+        gm = backend.graphs[0]
+        exp_stack = {
+            "sin": ["sin"],
+            "cos": ["map", "cos"],
+            "mul": ["map", "mul"],
+            "add": ["map", "map", "add"],
+        }
+        self._check_source_fn_stack(gm, exp_stack)
+
+    def test_grad_source_fn_stack(self):
+        backend = EagerAndRecordGraphs()
+
+        def fn(x):
+            return x.sin().sum()
+
+        @torch.compile(backend=backend, fullgraph=False)
+        def wrapper_fn(x):
+            return torch.func.grad(torch.func.grad(fn))(x)
+
+        x = torch.randn(())
+
+        wrapper_fn(x)
+        gm = backend.graphs[0]
+        exp_stack = {
+            "sin": ["grad_impl", "grad_impl", "sin"],
+            "sum": ["grad_impl", "grad_impl", "sum"],
+        }
+        self._check_source_fn_stack(gm, exp_stack)
+
+    def test_vmap_source_fn_stack(self):
+        backend = EagerAndRecordGraphs()
+
+        def inner_fn(x):
+            return torch.func.vmap(lambda x: x.sum(0) + x.sum(1))(x)
+
+        @torch.compile(backend=backend, fullgraph=True)
+        def fn(x):
+            return torch.func.vmap(lambda x: inner_fn(x.cos()))(x)
+
+        x = torch.randn(3, 3, 3, 3)
+        fn(x)
+        gm = backend.graphs[0]
+        exp_stack = {
+            "cos": ["vmap_impl", "cos"],
+            "sum_1": ["vmap_impl", "vmap_impl", "sum_1"],
+            "sum_2": ["vmap_impl", "vmap_impl", "sum_2"],
+            "add": ["vmap_impl", "vmap_impl", "add"],
+        }
+        self._check_source_fn_stack(gm, exp_stack)
+
 
 class FuncTorchHigherOrderOpTests(torch._dynamo.test_case.TestCase):
     def run(self, result=None):
diff --git a/test/export/test_export.py b/test/export/test_export.py
@@ -346,7 +346,7 @@ def forward(self, x):
                 node.name in ep.graph_signature.inputs_to_buffers or
                 node.name in ep.graph_signature.inputs_to_parameters
             ):
-                self.assertTrue("source_fn" in node.meta)
+                self.assertTrue("source_fn_stack" in node.meta)
                 self.assertTrue("nn_module_stack" in node.meta)
 
 
@@ -1071,8 +1071,13 @@ def forward(self, x):
         for mod in gm.modules():
             for node in mod.graph.nodes:
                 if node.name in {"sin", "cos"}:
-                    actual_source_fns.append(node.meta.get("source_fn", None))
-        exp_source_fns = [("cos", "cos"), ("sin", "sin")]
+                    source_fn_st = node.meta.get("source_fn_stack", None)
+                    if source_fn_st is not None:
+                        source_names = []
+                        for source_fn in source_fn_st:
+                            source_names.append(source_fn[0])
+                        actual_source_fns.append(source_names)
+        exp_source_fns = [["cond", "cos"], ["cond", "sin"]]
         self.assertEqual(actual_source_fns, exp_source_fns)
 
     def test_lift_constants(self) -> None:
diff --git a/test/export/test_serialize.py b/test/export/test_serialize.py
@@ -273,10 +273,10 @@ def _check_graph_nodes(gm1, gm2, _check_meta=True):
                     #     node1.meta.get("nn_module_stack", None),
                     #     node2.meta.get("nn_module_stack", None),
                     # )
-                    # Check "source_fn" metadata
+                    # Check "source_fn_stack" metadata
                     self.assertEqual(
-                        node1.meta.get("source_fn", None),
-                        node2.meta.get("source_fn", None),
+                        node1.meta.get("source_fn_stack", None),
+                        node2.meta.get("source_fn_stack", None),
                     )
 
         _check_graph_nodes(ep.graph_module, deserialized_ep.graph_module, _check_meta)
diff --git a/test/functorch/test_control_flow.py b/test/functorch/test_control_flow.py
@@ -1450,7 +1450,7 @@ def false_fn(x):
             return x * x.sin()
 
         def foo(x):
-            return cond(x.shape[0] == 4, true_fn, false_fn, [x])
+            return cond(x.shape[0] == 4, true_fn, false_fn, (x,))
         inp = torch.randn([4, 3])
         gm, _ = torch._dynamo.export(foo)(inp)
 
@@ -1461,7 +1461,7 @@ def run_with_interpreter(*args):
 
 
         checked_ops = {"add", "mul", "sin", "cos"}
-        checked_meta = ["source_fn", "stack_trace"]
+        checked_meta = ["source_fn_stack", "stack_trace"]
         all_source_fns = collect_meta_for_filtered_nodes(gm, checked_ops, checked_meta)
         new_source_fns = collect_meta_for_filtered_nodes(new_gm, checked_ops, checked_meta)
         self.assertEqual(all_source_fns, new_source_fns)
diff --git a/test/test_fx.py b/test/test_fx.py
@@ -1757,13 +1757,13 @@ def forward(self, x):
             if node.op == 'get_attr':
                 node.meta["nn_module_stack"] = "self"
                 node.meta["stack_trace"] = "stack_trace"
-                node.meta["source_fn"] = "source_fn"
+                node.meta["source_fn_stack"] = "source_fn_stack"
         new_gm = Transformer(gm).transform()
         for node in new_gm.graph.nodes:
             if node.op == 'get_attr':
                 self.assertEqual(node.meta["nn_module_stack"], "self")
                 self.assertEqual(node.meta["stack_trace"], "stack_trace")
-                self.assertEqual(node.meta["source_fn"], "source_fn")
+                self.assertEqual(node.meta["source_fn_stack"], "source_fn_stack")
 
 
     def test_interpreter(self):
diff --git a/torch/_dynamo/output_graph.py b/torch/_dynamo/output_graph.py
@@ -409,11 +409,13 @@ def remove_node(self, *args, **kwargs):
         return self.current_tracer.remove_node(*args, **kwargs)
 
     @contextlib.contextmanager
-    def new_subtracer(self):
+    def new_subtracer(self, source_target):
         new_scope_ctx = enter_new_scope()
         try:
             new_scope_ctx.__enter__()
-            tracer = SubgraphTracer(self, parent=self.current_tracer)
+            tracer = SubgraphTracer(
+                self, parent=self.current_tracer, source_target=source_target
+            )
             self.tracers.append(tracer)
             yield tracer
         finally:
@@ -1185,7 +1187,9 @@ class SubgraphTracer(fx.Tracer):
     compiling and executing the graph.
     """
 
-    def __init__(self, output_graph, parent=None, export_root=False):
+    def __init__(
+        self, output_graph, parent=None, export_root=False, source_target=None
+    ):
         super().__init__()
         self.output_graph = weakref.proxy(output_graph)
         self.graph = torch.fx.Graph()
@@ -1220,6 +1224,17 @@ def __init__(self, output_graph, parent=None, export_root=False):
         self.lifted_freevars = collections.OrderedDict()
         self.prev_inst = None
 
+        # Each SubgraphTracer is associated with a source target, which indicates
+        # which operator this subgraph is attached to. We compute a source_fn_stack
+        # based on the source tareget. For the root tracer, it's set to [].
+        # This is useful for debugging and transforming the exported graph.
+        if self.parent is None:
+            self.source_fn_stack = []
+        else:
+            self.source_fn_stack = self.parent.source_fn_stack + [
+                (self.graph._target_to_str(source_target), source_target)
+            ]
+
     def create_proxy(
         self,
         kind,
@@ -1302,15 +1317,19 @@ def get_trace_call_log_str():
             rv.node.meta["nn_module_stack"] = nn_module_stack.copy()
 
         if kind in {"call_function", "call_method"}:
-            rv.node.meta["source_fn"] = (rv.node.name, target)
+            rv.node.meta["source_fn_stack"] = self.source_fn_stack + [
+                (rv.node.name, target)
+            ]
         elif kind == "call_module":
             if self.parent is not None:
                 unimplemented("Invoking an nn.Module inside HigherOrderOperator")
             # For modules we store the class
-            rv.node.meta["source_fn"] = (
-                rv.node.name,
-                rv.node.meta["nn_module_stack"][target][1],
-            )
+            rv.node.meta["source_fn_stack"] = self.source_fn_stack + [
+                (
+                    rv.node.name,
+                    rv.node.meta["nn_module_stack"][target][1],
+                )
+            ]
 
         frame_summaries: List[traceback.FrameSummary] = []
         while tx:
diff --git a/torch/_dynamo/variables/higher_order_ops.py b/torch/_dynamo/variables/higher_order_ops.py
@@ -118,6 +118,7 @@ def speculate_subgraph(
     graph_checkpoint,
     checkpoint,
     description,
+    source_target,
     *,
     always_restore=False,
     enable_grad=False,
@@ -141,7 +142,7 @@ def speculate_subgraph(
         )
 
     try:
-        with tx.output.new_subtracer() as tracer:
+        with tx.output.new_subtracer(source_target) as tracer:
             args = validate_args_and_maybe_create_graph_inputs(
                 sub_args, tracer, tx, manually_set_subgraph_inputs
             )
@@ -397,6 +398,7 @@ def speculate_branch(branch):
                     graph_checkpoint,
                     checkpoint,
                     "cond",
+                    self.value,
                 )
             # Reraise because we want to suggest workarounds
             except Unsupported as e:
@@ -546,6 +548,7 @@ def call_function(
             tx.output.graph,
             checkpoint,
             "torch.ops.higher_order.map",
+            self.value,
         )
 
         body_nn_modules = tx.copy_graphstate().output.nn_modules
@@ -683,6 +686,7 @@ def call_function(
             graph_checkpoint,
             checkpoint,
             "torch.func.grad",
+            self.value,
             # See NOTE [HACK: Enable autograd while tracing function]
             enable_grad=True,
         )
@@ -872,6 +876,7 @@ def call_function(
                 graph_checkpoint,
                 checkpoint,
                 "torch.vmap",
+                self.value,
             )
 
         body_name = add_subgraph(
@@ -982,6 +987,7 @@ def call_function(
             graph_checkpoint,
             checkpoint,
             "the user-defined autograd.Function",
+            self.value,
             # Backwards should never, ever be stored!
             always_restore=always_restore,
             restore_side_effects=False,
@@ -1039,6 +1045,7 @@ def create_wrapped_node(self, tx, args, kwargs, description):
             graph_checkpoint,
             checkpoint,
             description,
+            self.value,
             manually_set_subgraph_inputs=False,
         )
 
diff --git a/torch/_export/serde/serialize.py b/torch/_export/serde/serialize.py
diff --git a/torch/_inductor/utils.py b/torch/_inductor/utils.py
diff --git a/torch/fx/passes/utils/source_matcher_utils.py b/torch/fx/passes/utils/source_matcher_utils.py
diff --git a/torch/fx/proxy.py b/torch/fx/proxy.py