Remove fuse_unsqueeze_cat_sum from OSS (#11)

Yinghai Lu · Wei Wei · commit ced1978ebf9a · 2022-06-03T17:54:11.000-07:00
Summary: Pull Request resolved: https://github.com/pytorch/fx2trt/pull/11 This pass is a case specific one and it should be applied to the whole net instead of the lowering subnet because there are special cases where #cat input is 1 and it can potentially remove all the ops and make input and output the same tensor which Trt doesn't like. Reviewed By: wushirong Differential Revision: D34709569 fbshipit-source-id: 70438ba582063aa65b27f74f11d39b27897370b8
diff --git a/fx/lower.py b/fx/lower.py
@@ -23,7 +23,6 @@
 from .passes.fuse_pass import (
     fuse_permute_linear,
     fuse_permute_matmul,
-    fuse_unsqueeze_cat_sum,
 )
 from .passes.remove_duplicate_output_args import (
     remove_duplicate_output_args,
@@ -252,7 +251,6 @@ def __call__(self, mod, input, split_name) -> TRTInterpreterResult:
         if self.lower_setting.enable_fuse:
             mod = fuse_permute_matmul(mod)
             mod = fuse_permute_linear(mod)
-            mod = fuse_unsqueeze_cat_sum(mod)
             FUSE_PASSES_POST_OBSERVER.observe(mod, input)
 
         # Prepare algorithm selector and timing_cache for TRTInterpreter
diff --git a/fx/passes/fuse_pass.py b/fx/passes/fuse_pass.py
@@ -151,40 +151,6 @@ def fuse_permute_matmul(gm: torch.fx.GraphModule):
     return gm
 
 
-@observable()
-def fuse_unsqueeze_cat_sum(gm: torch.fx.GraphModule):
-    for node in gm.graph.nodes:
-        if node.target != acc_ops.sum:
-            continue
-        prev_node = node.kwargs["input"]
-        if prev_node.target != acc_ops.cat or prev_node.kwargs["dim"] != 0:
-            continue
-        cat_inputs = prev_node.kwargs["tensors"]
-        valid_pass = True
-        for i in cat_inputs:
-            if i.target != acc_ops.unsqueeze or i.kwargs["dim"] != 0:
-                valid_pass = False
-                break
-
-        if not valid_pass:
-            continue
-        input_val = [i.kwargs["input"] for i in cat_inputs]
-
-        with gm.graph.inserting_before(node):
-            left = input_val[0]
-            for i in range(1, len(input_val)):
-                right = input_val[i]
-                fused_node = gm.graph.call_function(acc_ops.add, kwargs={"input": left, "other": right})
-                left = fused_node
-        node.replace_all_uses_with(fused_node)
-
-    gm.graph.eliminate_dead_code()
-    gm.graph.lint()
-    gm.recompile()
-    return gm
-
-
-
 try:
     # @manual=//deeplearning/trt/python:py_tensorrt
     import tensorrt as trt
diff --git a/test/passes/test_fuse_unsqueeze_cat_sum_trt.py b/test/passes/test_fuse_unsqueeze_cat_sum_trt.py