feat: Safety Mode for Runtime

gs-olive · gs-olive · commit 91c93f59f3eb · 2023-12-04T10:27:08.000-08:00
- Add safety mode for Torch-TensorRT runtime
- Add C++ TorchBind bindings and relevant lambda functions to get and
set necessary attributes
- Add runtime augmentations to support different modes
- Add testing for safe mode settings
diff --git a/core/runtime/execute_engine.cpp b/core/runtime/execute_engine.cpp
@@ -74,7 +74,7 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
     LOG_INFO("" << log_info);
   }
 
-  {
+  if (SAFE_MODE) {
     std::unique_ptr<torch::autograd::profiler::RecordProfile> device_profiler_guard;
     if (compiled_engine->profile_execution) {
       device_profiler_guard =
diff --git a/core/runtime/register_jit_hooks.cpp b/core/runtime/register_jit_hooks.cpp
@@ -114,6 +114,8 @@ TORCH_LIBRARY(tensorrt, m) {
   m.def("execute_engine", execute_engine);
   m.def("SERIALIZED_ENGINE_BINDING_DELIM", []() -> std::string { return std::string(1, TRTEngine::BINDING_DELIM); });
   m.def("ABI_VERSION", []() -> std::string { return ABI_VERSION; });
+  m.def("get_safe_mode", []() -> bool { return SAFE_MODE; });
+  m.def("set_safe_mode", [](bool safe_mode) -> void { SAFE_MODE = safe_mode; });
 }
 
 } // namespace
diff --git a/core/runtime/runtime.cpp b/core/runtime/runtime.cpp
@@ -7,6 +7,8 @@ namespace torch_tensorrt {
 namespace core {
 namespace runtime {
 
+bool SAFE_MODE = true;
+
 c10::optional<RTDevice> get_most_compatible_device(const RTDevice& target_device, const RTDevice& curr_device) {
   LOG_DEBUG("Target Device: " << target_device);
   auto device_options = find_compatible_devices(target_device);
diff --git a/core/runtime/runtime.h b/core/runtime/runtime.h
@@ -16,6 +16,7 @@ namespace runtime {
 
 using EngineID = int64_t;
 const std::string ABI_VERSION = "4";
+extern bool SAFE_MODE;
 typedef enum {
   ABI_TARGET_IDX = 0,
   NAME_IDX,
diff --git a/py/torch_tensorrt/__init__.py b/py/torch_tensorrt/__init__.py
@@ -82,18 +82,23 @@ def _find_lib(name: str, paths: List[str]) -> str:
 
 import torch
 from torch_tensorrt._compile import *  # noqa: F403
+from torch_tensorrt._compile import (
+    enable_safe_inference_mode,
+    enable_unsafe_inference_mode,
+)
 from torch_tensorrt._Device import Device  # noqa: F401
 from torch_tensorrt._enums import *  # noqa: F403
 from torch_tensorrt._Input import Input  # noqa: F401
-from torch_tensorrt.logging import *
-from torch_tensorrt.ptq import *
 from torch_tensorrt._utils import *  # noqa: F403
 from torch_tensorrt._utils import sanitized_torch_version
+from torch_tensorrt.logging import *
+from torch_tensorrt.ptq import *
 
 if version.parse(sanitized_torch_version()) >= version.parse("2.1.dev"):
-    from torch_tensorrt import dynamo  # noqa: F401
     from torch_tensorrt.dynamo import backend  # noqa: F401
 
+    from torch_tensorrt import dynamo  # noqa: F401
+
 
 def _register_with_torch() -> None:
     trtorch_dir = os.path.dirname(__file__)
diff --git a/py/torch_tensorrt/_compile.py b/py/torch_tensorrt/_compile.py
@@ -256,6 +256,22 @@ def torch_compile(module: torch.nn.Module, **kwargs: Any) -> Any:
     return boxed_fn
 
 
+def enable_unsafe_inference_mode():
+    """
+    Enables unsafe inference mode for Torch-TensorRT
+    """
+    torch.ops.tensorrt.set_safe_mode(False)
+    logger.info("Enabled unsafe inference mode")
+
+
+def enable_safe_inference_mode():
+    """
+    Enables safe inference mode for Torch-TensorRT
+    """
+    torch.ops.tensorrt.set_safe_mode(True)
+    logger.info("Enabled safe inference mode")
+
+
 def convert_method_to_trt_engine(
     module: Any,
     method_name: str = "forward",
diff --git a/tests/py/dynamo/runtime/test_safe_mode.py b/tests/py/dynamo/runtime/test_safe_mode.py
@@ -0,0 +1,57 @@
+import torch
+from torch.testing._internal.common_utils import TestCase, run_tests
+
+import torch_tensorrt
+
+
+class TestSafeMode(TestCase):
+    def test_safe_mode_enabled(self):
+        torch_tensorrt.enable_safe_inference_mode()
+        self.assertTrue(torch.ops.tensorrt.get_safe_mode())
+
+    def test_unsafe_mode_enabled(self):
+        torch_tensorrt.enable_unsafe_inference_mode()
+        self.assertFalse(torch.ops.tensorrt.get_safe_mode())
+
+    def test_unsafe_mode_enabled_inference(self):
+        torch_tensorrt.enable_unsafe_inference_mode()
+
+        class SampleModel(torch.nn.Module):
+            def forward(self, x):
+                return torch.softmax((x + 2) * 7, dim=0)
+
+        inputs = [
+            torch.tensor(
+                3,
+                5,
+                7,
+            ).cuda()
+        ]
+
+        fx_graph = torch.fx.symbolic_trace(SampleModel())
+
+        # Validate that the results between Torch and Torch-TRT are similar
+        optimized_model = torch_tensorrt.compile(
+            fx_graph,
+            "torch_compile",
+            inputs,
+            min_block_size=1,
+            pass_through_build_failures=True,
+            use_python_runtime=True,
+        )
+        optimized_model_results = optimized_model(*inputs).detach().cpu()
+        torch_model_results = fx_graph(*inputs).detach().cpu()
+
+        max_diff = float(
+            torch.max(torch.abs(optimized_model_results - torch_model_results))
+        )
+        self.assertAlmostEqual(
+            max_diff,
+            0,
+            msg=f"Unsafe Mode TRT outputs don't match with the original model.",
+        )
+        torch._dynamo.reset()
+
+
+if __name__ == "__main__":
+    run_tests()

Original file line number	Diff line number	Diff line change
`@@ -74,7 +74,7 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr`
`74`	`74`	`LOG_INFO("" << log_info);`
`75`	`75`	`}`
`76`	`76`
`77`		`- {`
	`77`	`+ if (SAFE_MODE) {`
`78`	`78`	`std::unique_ptr<torch::autograd::profiler::RecordProfile> device_profiler_guard;`
`79`	`79`	`if (compiled_engine->profile_execution) {`
`80`	`80`	`device_profiler_guard =`
Original file line number	Diff line number	Diff line change
`@@ -114,6 +114,8 @@ TORCH_LIBRARY(tensorrt, m) {`
`114`	`114`	`m.def("execute_engine", execute_engine);`
`115`	`115`	`m.def("SERIALIZED_ENGINE_BINDING_DELIM", []() -> std::string { return std::string(1, TRTEngine::BINDING_DELIM); });`
`116`	`116`	`m.def("ABI_VERSION", []() -> std::string { return ABI_VERSION; });`
	`117`	`+ m.def("get_safe_mode", []() -> bool { return SAFE_MODE; });`
	`118`	`+ m.def("set_safe_mode", [](bool safe_mode) -> void { SAFE_MODE = safe_mode; });`
`117`	`119`	`}`
`118`	`120`
`119`	`121`	`} // namespace`