fix: Address review comments

gs-olive · gs-olive · commit 90dc97609173 · 2023-12-07T14:35:53.000-08:00
diff --git a/core/runtime/TRTEngine.cpp b/core/runtime/TRTEngine.cpp
@@ -52,7 +52,7 @@ TRTEngine::TRTEngine(
   auto most_compatible_device = get_most_compatible_device(cuda_device);
   TORCHTRT_CHECK(most_compatible_device, "No compatible device was found for instantiating TensorRT engine");
   device_info = most_compatible_device.value();
-  multi_gpu_device_check(device_info);
+  multi_gpu_device_check();
   set_rt_device(device_info);
 
   rt = make_trt(nvinfer1::createInferRuntime(util::logging::get_logger()));
diff --git a/core/runtime/runtime.cpp b/core/runtime/runtime.cpp
@@ -105,16 +105,14 @@ RTDevice get_current_device() {
   return RTDevice(device_id, nvinfer1::DeviceType::kGPU);
 }
 
-void multi_gpu_device_check(const RTDevice& most_compatible_device) {
+void multi_gpu_device_check() {
   // If multi-device safe mode is disabled and more than 1 device is registered on the machine, warn user
   if (!(MULTI_DEVICE_SAFE_MODE) && get_available_device_list().get_devices().size() > 1) {
     LOG_WARNING(
         "Detected this engine is being instantitated in a multi-GPU system with "
         << "multi-device safe mode disabled. For more on the implications of this "
-        << "as well as workarounds, see MULTI_DEVICE_SAFE_MODE.md "
-        << "(https://github.com/pytorch/TensorRT/blob/main/py/torch_tensorrt/dynamo/runtime/MULTI_DEVICE_SAFE_MODE.md). "
-        << "The engine is set to be instantiated on the cuda device, " << most_compatible_device << ". "
-        << "If this is incorrect, please set the desired cuda device as default and retry.");
+        << "as well as workarounds, see the linked documentation "
+        << "(https://pytorch.org/TensorRT/user_guide/runtime.html#multi-device-safe-mode)");
   }
 }
 
diff --git a/core/runtime/runtime.h b/core/runtime/runtime.h
@@ -34,7 +34,7 @@ std::vector<RTDevice> find_compatible_devices(const RTDevice& target_device);
 
 std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intrusive_ptr<TRTEngine> compiled_engine);
 
-void multi_gpu_device_check(const RTDevice& most_compatible_device);
+void multi_gpu_device_check();
 
 class DeviceList {
   using DeviceMap = std::unordered_map<int, RTDevice>;
diff --git a/docsrc/user_guide/runtime.rst b/docsrc/user_guide/runtime.rst
@@ -34,3 +34,37 @@ Plugin Library
 In the case you use Torch-TensorRT as a converter to a TensorRT engine and your engine uses plugins provided by Torch-TensorRT, Torch-TensorRT
 ships the library ``libtorchtrt_plugins.so`` which contains the implementation of the TensorRT plugins used by Torch-TensorRT during
 compilation. This library can be ``DL_OPEN`` or ``LD_PRELOAD`` similar to other TensorRT plugin libraries.
+
+Multi Device Safe Mode
+---------------
+
+Multi-device safe mode is a setting in Torch-TensorRT which allows the user to determine whether
+the runtime checks for device consistency prior to every inference call.
+
+There is a non-negligible, fixed cost per-inference call when multi-device safe mode is enabled, which is why
+it is now disabled by default. It can be controlled via the following convenience function which
+doubles as a context manager.
+
+.. code-block:: python
+
+    # Enables Multi Device Safe Mode
+    torch_tensorrt.runtime.set_multi_device_safe_mode(True)
+
+    # Disables Multi Device Safe Mode [Default Behavior]
+    torch_tensorrt.runtime.set_multi_device_safe_mode(False)
+
+    # Enables Multi Device Safe Mode, then resets the safe mode to its prior setting
+    with torch_tensorrt.runtime.set_multi_device_safe_mode(True):
+        ...
+
+TensorRT requires that each engine be associated with the CUDA context in the active thread from which it is invoked.
+Therefore, if the device were to change in the active thread, which may be the case when invoking
+engines on multiple GPUs from the same Python process, safe mode will cause Torch-TensorRT to display
+an alert and switch GPUs accordingly. If safe mode were not enabled, there could be a mismatch in the engine
+device and CUDA context device, which could lead the program to crash.
+
+One technique for managing multiple TRT engines on different GPUs while not sacrificing performance for
+multi-device safe mode is to use Python threads. Each thread is responsible for all of the TRT engines
+on a single GPU, and the default CUDA device on each thread corresponds to the GPU for which it is
+responsible (can be set via ``torch.cuda.set_device(...)``). In this way, multiple threads can be used in the same
+Python script without needing to switch CUDA contexts and incur performance overhead.
diff --git a/py/torch_tensorrt/dynamo/runtime/MULTI_DEVICE_SAFE_MODE.md b/py/torch_tensorrt/dynamo/runtime/MULTI_DEVICE_SAFE_MODE.md
diff --git a/py/torch_tensorrt/dynamo/runtime/tools.py b/py/torch_tensorrt/dynamo/runtime/tools.py
@@ -17,8 +17,8 @@ def multi_gpu_device_check() -> None:
         logger.warning(
             "Detected this engine is being instantitated in a multi-GPU system with "
             "multi-device safe mode disabled. For more on the implications of this "
-            "as well as workarounds, see MULTI_DEVICE_SAFE_MODE.md "
-            "(https://github.com/pytorch/TensorRT/blob/main/py/torch_tensorrt/dynamo/runtime/MULTI_DEVICE_SAFE_MODE.md). "
+            "as well as workarounds, see the linked documentation "
+            "(https://pytorch.org/TensorRT/user_guide/runtime.html#multi-device-safe-mode). "
             f"The engine is set to be instantiated on the current default cuda device, cuda:{torch.cuda.current_device()}. "
             "If this is incorrect, please set the desired cuda device via torch.cuda.set_device(...) and retry."
         )

Original file line number	Diff line number	Diff line change
`@@ -105,16 +105,14 @@ RTDevice get_current_device() {`
`105`	`105`	`return RTDevice(device_id, nvinfer1::DeviceType::kGPU);`
`106`	`106`	`}`
`107`	`107`
`108`		`-void multi_gpu_device_check(const RTDevice& most_compatible_device) {`
	`108`	`+void multi_gpu_device_check() {`
`109`	`109`	`// If multi-device safe mode is disabled and more than 1 device is registered on the machine, warn user`
`110`	`110`	`if (!(MULTI_DEVICE_SAFE_MODE) && get_available_device_list().get_devices().size() > 1) {`
`111`	`111`	`LOG_WARNING(`
`112`	`112`	`"Detected this engine is being instantitated in a multi-GPU system with "`
`113`	`113`	`<< "multi-device safe mode disabled. For more on the implications of this "`
`114`		`- << "as well as workarounds, see MULTI_DEVICE_SAFE_MODE.md "`
`115`		`- << "(https://github.com/pytorch/TensorRT/blob/main/py/torch_tensorrt/dynamo/runtime/MULTI_DEVICE_SAFE_MODE.md). "`
`116`		`- << "The engine is set to be instantiated on the cuda device, " << most_compatible_device << ". "`
`117`		`- << "If this is incorrect, please set the desired cuda device as default and retry.");`
	`114`	`+ << "as well as workarounds, see the linked documentation "`
	`115`	`+ << "(https://pytorch.org/TensorRT/user_guide/runtime.html#multi-device-safe-mode)");`
`118`	`116`	`}`
`119`	`117`	`}`
`120`	`118`