feat(collections): Enable grouped inputs via partial compilation

narendasan · narendasan · commit f5199355f3cc · 2022-08-03T22:02:19.000-07:00
HACK: This PR enables grouped input features by leveraging partial compilation
and disabling tuple and list evaluators in the case where grouped inputs
are used. The intention is that this WAR is removed in the next release

Signed-off-by: Naren Dasan &lt;naren@narendasan.com&gt;
Signed-off-by: Naren Dasan &lt;narens@nvidia.com&gt;
diff --git a/cpp/src/compile_spec.cpp b/cpp/src/compile_spec.cpp
@@ -63,7 +63,7 @@ void to_internal_input_signature(torch::jit::IValue input_ivalue, torch::jit::IV
   }
 }
 
-torchtrt::core::CompileSpec init_compile_spec(CompileSpec external) {
+torchtrt::core::CompileSpec init_compile_spec(CompileSpec& external) {
   if (external.graph_inputs.inputs.size() > 0) {
     torchtrt::core::CompileSpec internal(to_vec_internal_inputs(external.graph_inputs.inputs));
     return internal;
@@ -72,6 +72,25 @@ torchtrt::core::CompileSpec init_compile_spec(CompileSpec external) {
     LOG_WARNING( "Input signature parsing is an experimental feature, behavior and APIs may change");
     to_internal_input_signature(external.graph_inputs.input_signature, converted_input_signature);
     torchtrt::core::CompileSpec internal(converted_input_signature);
+
+    TORCHTRT_CHECK(!external.require_full_compilation, \
+      "Grouped inputs currently requires partial compilation to be enabled, \
+      this restriction will be relaxed in a future release");
+
+    LOG_DEBUG("Grouped inputs currently requires additional settings to enable the feature");
+    LOG_DEBUG("Adding the following ops to torch_executed_ops:" \
+       << std::endl << "  - aten::__getitem__" \
+       << std::endl << "  - prim::ListConstruct" \
+       << std::endl << "  - prim::ListUnpack" \
+       << std::endl << "  - prim::TupleIndex" \
+       << std::endl << "  - prim::TupleConstruct" \
+       << std::endl << "  - prim::TupleUnpack");
+    external.torch_executed_ops.push_back("aten::__getitem__");
+    external.torch_executed_ops.push_back("prim::ListConstruct");
+    external.torch_executed_ops.push_back("prim::ListUnpack");
+    external.torch_executed_ops.push_back("prim::TupleIndex");
+    external.torch_executed_ops.push_back("prim::TupleConstruct");
+    external.torch_executed_ops.push_back("prim::TupleUnpack");
     return internal;
   }
 }
diff --git a/py/torch_tensorrt/ts/_compile_spec.py b/py/torch_tensorrt/ts/_compile_spec.py
@@ -8,6 +8,7 @@
 from torch_tensorrt.logging import Level, log
 from typing import Tuple, List, Dict
 import warnings
+from copy import deepcopy
 
 
 def _internal_input_to_torch_class_input(i: _C.Input) -> torch.classes.tensorrt._Input:
@@ -188,7 +189,9 @@ def _parse_input_signature(input_signature: Any):
     else:
         raise KeyError("Input signature contains an unsupported type {}".format(type(input_signature)))
 
-def _parse_compile_spec(compile_spec: Dict[str, Any]) -> _ts_C.CompileSpec:
+def _parse_compile_spec(compile_spec_: Dict[str, Any]) -> _ts_C.CompileSpec:
+    # TODO: Remove deep copy once collections does not need partial compilation
+    compile_spec = deepcopy(compile_spec_)
     info = _ts_C.CompileSpec()
 
     if len(compile_spec["inputs"]) > 0:
@@ -204,6 +207,25 @@ def _parse_compile_spec(compile_spec: Dict[str, Any]) -> _ts_C.CompileSpec:
         signature = _parse_input_signature(compile_spec["input_signature"])
         info.input_signature = _C.InputSignature(signature) # py_object
 
+        if not compile_spec["torch_fallback"]["enabled"]:
+            raise ValueError("Grouped inputs currently requires partial compilation to be enabled, this restriction will be relaxed in a future release")
+
+        log(Level.Debug, "Grouped inputs currently requires additional settings to enable the feature")
+        log(Level.Debug, """Adding the following ops to torch_executed_ops:
+    - aten::__getitem__
+    - prim::ListConstruct
+    - prim::ListUnpack
+    - prim::TupleIndex
+    - prim::TupleConstruct
+    - prim::TupleUnpack
+""")
+        compile_spec["torch_fallback"]["forced_fallback_ops"].append("aten::__getitem__")
+        compile_spec["torch_fallback"]["forced_fallback_ops"].append("prim::ListConstruct")
+        compile_spec["torch_fallback"]["forced_fallback_ops"].append("prim::ListUnpack")
+        compile_spec["torch_fallback"]["forced_fallback_ops"].append("prim::TupleIndex")
+        compile_spec["torch_fallback"]["forced_fallback_ops"].append("prim::TupleConstruct")
+        compile_spec["torch_fallback"]["forced_fallback_ops"].append("prim::TupleUnpack")
+
     else:
         raise KeyError(
             "Module input definitions are requried to compile module. Provide a list of torch_tensorrt.Input keyed to \"inputs\" in the compile spec"
diff --git a/py/torch_tensorrt/ts/_compiler.py b/py/torch_tensorrt/ts/_compiler.py
@@ -103,8 +103,7 @@ def compile(module: torch.jit.ScriptModule,
 
     if require_full_compilation and (len(torch_executed_modules) > 0 or len(torch_executed_ops) > 0):
         raise ValueError(
-            "require_full_compilation is enabled however the list of modules and ops to run in torch is not empty. Found: torch_executed_ops: "
-            + torch_executed_ops + ", torch_executed_modules: " + torch_executed_modules)
+            f"require_full_compilation is enabled however the list of modules and ops to run in torch is not empty. Found: torch_executed_ops: {torch_executed_ops}, torch_executed_modules: {torch_executed_modules}")
 
     spec = {
         "inputs": inputs,
diff --git a/tests/cpp/test_collections.cpp b/tests/cpp/test_collections.cpp
@@ -34,7 +34,6 @@ TEST(CppAPITests, TestCollectionStandardTensorInput) {
   input_range.push_back({in0.sizes(), torch::kF16});
   input_range.push_back({in0.sizes(), torch::kF16});
   torch_tensorrt::ts::CompileSpec compile_settings(input_range);
-  compile_settings.require_full_compilation = true;
   compile_settings.min_block_size = 1;
 
   // // FP16 execution
@@ -78,7 +77,6 @@ TEST(CppAPITests, TestCollectionTupleInput) {
   torch::jit::IValue complex_input_shape2(input_tuple2);
 
   auto compile_settings = torch_tensorrt::ts::CompileSpec(complex_input_shape2);
-  compile_settings.require_full_compilation = true;
   compile_settings.min_block_size = 1;
 
   // // FP16 execution
@@ -136,7 +134,6 @@ TEST(CppAPITests, TestCollectionListInput) {
   torch::jit::IValue complex_input_shape2(input_tuple2);
 
   auto compile_settings = torch_tensorrt::ts::CompileSpec(complex_input_shape2);
-  compile_settings.require_full_compilation = true;
   compile_settings.min_block_size = 1;
   //compile_settings.torch_executed_ops.push_back("aten::__getitem__");
 
@@ -184,7 +181,6 @@ TEST(CppAPITests, TestCollectionTupleInputOutput) {
   // torch::jit::IValue complex_input_shape(list);
 
   auto compile_settings = torch_tensorrt::ts::CompileSpec(complex_input_shape2);
-  compile_settings.require_full_compilation = true;
   compile_settings.min_block_size = 1;
 
   // compile_settings.torch_executed_ops.push_back("prim::TupleConstruct");
@@ -248,12 +244,8 @@ TEST(CppAPITests, TestCollectionListInputOutput) {
   torch::jit::IValue complex_input_shape2(input_tuple2);
 
   auto compile_settings = torch_tensorrt::ts::CompileSpec(complex_input_shape2);
-  compile_settings.require_full_compilation = true;
   compile_settings.min_block_size = 1;
 
-  // Need to skip the conversion of __getitem__ and ListConstruct
-  //compile_settings.torch_executed_ops.push_back("aten::__getitem__");
-
   // // FP16 execution
   compile_settings.enabled_precisions = {torch::kHalf};
   // // Compile module
@@ -313,12 +305,8 @@ TEST(CppAPITests, TestCollectionComplexModel) {
   torch::jit::IValue complex_input_shape2(input_tuple2);
 
   auto compile_settings = torch_tensorrt::ts::CompileSpec(complex_input_shape2);
-  compile_settings.require_full_compilation = true;
   compile_settings.min_block_size = 1;
 
-  // Need to skip the conversion of __getitem__ and ListConstruct
-  //compile_settings.torch_executed_ops.push_back("aten::__getitem__");
-
   // // FP16 execution
   compile_settings.enabled_precisions = {torch::kHalf};
   // // Compile module
diff --git a/tests/py/api/test_collections.py b/tests/py/api/test_collections.py
@@ -48,8 +48,7 @@ def test_compile(self):
             "input_signature": ((torchtrt.Input(self.input.shape), torchtrt.Input(self.input.shape)),),
             "device": torchtrt.Device("gpu:0"),
             "enabled_precisions": {torch.float},
-            "require_full_compilation": False,
-            "min_block_size": 3
+            "min_block_size": 1
         }
 
         trt_mod = torchtrt.ts.compile(self.model, **compile_spec)
@@ -69,8 +68,7 @@ def test_compile(self):
             "input_signature": ([torchtrt.Input(self.input.shape), torchtrt.Input(self.input.shape)],),
             "device": torchtrt.Device("gpu:0"),
             "enabled_precisions": {torch.float},
-            "require_full_compilation": False,
-            "min_block_size": 3
+            "min_block_size": 1
         }
 
         trt_mod = torchtrt.ts.compile(self.model, **compile_spec)
@@ -89,8 +87,7 @@ def test_compile(self):
             "input_signature": ((torchtrt.Input(self.input.shape), torchtrt.Input(self.input.shape)),),
             "device": torchtrt.Device("gpu:0"),
             "enabled_precisions": {torch.float},
-            "require_full_compilation": False,
-            "min_block_size": 3
+            "min_block_size": 1
         }
 
         trt_mod = torchtrt.ts.compile(self.model, **compile_spec)
@@ -111,8 +108,7 @@ def test_compile(self):
             "input_signature": ([torchtrt.Input(self.input.shape), torchtrt.Input(self.input.shape)],),
             "device": torchtrt.Device("gpu:0"),
             "enabled_precisions": {torch.float},
-            "require_full_compilation": False,
-            "min_block_size": 3
+            "min_block_size": 1
         }
 
         trt_mod = torchtrt.ts.compile(self.model, **compile_spec)
@@ -134,8 +130,7 @@ def test_compile(self):
             "input_signature": ([torchtrt.Input(self.input.shape), torchtrt.Input(self.input.shape)],),
             "device": torchtrt.Device("gpu:0"),
             "enabled_precisions": {torch.float},
-            "require_full_compilation": False,
-            "min_block_size": 3
+            "min_block_size": 1
         }
 
         trt_mod = torchtrt.ts.compile(self.model, **compile_spec)