chore: Rename variables

keehyuna · keehyuna · commit 40f3eaf28f3c · 2024-12-10T22:57:53.000+09:00
diff --git a/core/runtime/TRTEngine.cpp b/core/runtime/TRTEngine.cpp
@@ -99,8 +99,8 @@ TRTEngine::TRTEngine(
   exec_ctx = make_trt(cuda_engine->createExecutionContext());
   TORCHTRT_CHECK((exec_ctx.get() != nullptr), "Unable to create TensorRT execution context");
 
-  runtime_states.prev_cudagraphs_enabled = CUDAGRAPHS_MODE;
-  runtime_states.prev_pre_allocated_outputs_enabled = false;
+  runtime_states.old_cudagraphs = CUDAGRAPHS_MODE;
+  runtime_states.old_pre_allocated_outputs = false;
 
   if (_in_binding_names.size() == 0 && _out_binding_names.size() == 0) {
     uint64_t inputs = 0;
diff --git a/core/runtime/TRTEngine.h b/core/runtime/TRTEngine.h
@@ -30,34 +30,30 @@ using FlattenedState = std::tuple<
     std::tuple<std::string, std::string>, // serialized metadata
     std::tuple<std::string, std::string>>; // Platform
 
-struct RuntimeStates {
-  bool need_cudagraphs_record;
-  bool can_use_pre_allocated_outputs;
-};
-
 struct TorchTRTRuntimeStates {
-  // Previous runtime states
-  bool prev_cudagraphs_enabled, prev_pre_allocated_outputs_enabled;
+  // Indicates whether CUDAGraphs were enabled in the previous execute_engine
+  bool old_cudagraphs;
+  // Indicates whether pre-allocated output was enabled in the previous execute_engine
+  bool old_pre_allocated_outputs;
 
   // Evaluates whether certain conditions are met to enable CUDA Graph recording or to reuse pre-allocated outputs
   // based on the current and previous states, as well as input shape has changed
-  RuntimeStates validate_states(bool cudagraphs_enabled, bool pre_allocated_outputs_enabled, bool shape_changed) {
+  std::tuple<bool, bool> set_runtime_states(bool new_cudagraphs, bool new_pre_allocated_output, bool shape_changed) {
     bool need_cudagraphs_record = false;
     bool can_use_pre_allocated_outputs = false;
 
     // Cudagraphs record is required if cudagraphs_enabled is switched to True regardless of shape change
-    if (cudagraphs_enabled && (!prev_cudagraphs_enabled || shape_changed)) {
+    if (new_cudagraphs && (!old_cudagraphs || shape_changed)) {
       need_cudagraphs_record = true;
     }
     // Pre-allocated output can be used when previous and current state are true without shape change
-    if (prev_pre_allocated_outputs_enabled && pre_allocated_outputs_enabled && !shape_changed) {
+    if (old_pre_allocated_outputs && new_pre_allocated_output && !shape_changed) {
       can_use_pre_allocated_outputs = true;
     }
-    prev_cudagraphs_enabled = cudagraphs_enabled;
-    prev_pre_allocated_outputs_enabled = pre_allocated_outputs_enabled;
+    old_cudagraphs = new_cudagraphs;
+    old_pre_allocated_outputs = new_pre_allocated_output;
 
-    RuntimeStates values = {need_cudagraphs_record, can_use_pre_allocated_outputs};
-    return values;
+    return {need_cudagraphs_record, can_use_pre_allocated_outputs};
   }
 };
 
diff --git a/core/runtime/execute_engine.cpp b/core/runtime/execute_engine.cpp
@@ -203,9 +203,11 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
   bool shape_changed = _validate_shapes(inputs, compiled_engine);
 
   // Whether cudagraphs needs to record the graph on this pass
-  RuntimeStates states = compiled_engine->runtime_states.validate_states(
+  auto result = compiled_engine->runtime_states.set_runtime_states(
       CUDAGRAPHS_MODE, compiled_engine->use_pre_allocated_outputs, shape_changed);
-  bool need_cudagraphs_record = states.need_cudagraphs_record;
+
+  bool need_cudagraphs_record = std::get<0>(result);
+  bool can_use_pre_allocated_outputs = std::get<1>(result);
 
   if (!CUDAGRAPHS_MODE || shape_changed) {
     compiled_engine->cudagraph.reset();
@@ -288,7 +290,7 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
       output_profiler_guard =
           std::make_unique<torch::autograd::profiler::RecordProfile>(compiled_engine->output_profile_path);
     }
-    if (states.can_use_pre_allocated_outputs) {
+    if (can_use_pre_allocated_outputs) {
       outputs = compiled_engine->pre_allocated_outputs;
     } else {
       outputs = create_output_tensors(compiled_engine);
diff --git a/examples/dynamo/pre_allocated_output_example.py b/examples/dynamo/pre_allocated_output_example.py
@@ -88,9 +88,11 @@ def test_module_perf(model, *input):
 # Enable/Disable pre-allocated output buffer feature using runtime api
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-# We can enable the pre-allocated output buffer with a context manager
+# Enable pre-allocated output buffer using a context manager
 with torch_tensorrt.runtime.enable_pre_allocated_outputs(optimized_model):
     out_trt = optimized_model(*inputs)
+    # Subsequent inferences can use the pre-allocated output buffer (no shape change)
+    out_trt = optimized_model(*inputs)
 
 # Alternatively, we can enable the feature using a context object
 pre_allocated_output_ctx = torch_tensorrt.runtime.enable_pre_allocated_outputs(
diff --git a/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py b/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py
@@ -24,14 +24,16 @@
 
 
 class TorchTRTRuntimeStates:
-    def __init__(self, cudagraphs_enabled: bool, pre_allocated_outputs_enabled: bool):
-        self.prev_cudagraphs_enabled = cudagraphs_enabled
-        self.prev_pre_allocated_outputs_enabled = pre_allocated_outputs_enabled
+    def __init__(self, new_cudagraphs: bool, new_pre_allocated_output: bool):
+        # Indicates whether CUDAGraphs were enabled in the previous execute_engine
+        self.old_cudagraphs = new_cudagraphs
+        # Indicates whether pre-allocated output was enabled in the previous execute_engine
+        self.old_pre_allocated_outputs = new_pre_allocated_output
 
     def validate_states(
         self,
-        cudagraphs_enabled: bool,
-        pre_allocated_outputs_enabled: bool,
+        new_cudagraphs: bool,
+        new_pre_allocated_output: bool,
         shape_changed: bool,
     ) -> Tuple[bool, bool]:
         # Evaluates whether certain conditions are met to enable CUDA Graph recording or to reuse pre-allocated outputs
@@ -40,19 +42,19 @@ def validate_states(
         can_use_pre_allocated_outputs = False
 
         # Cudagraphs record is required if cudagraphs_enabled is switched to True regardless of shape change
-        if cudagraphs_enabled and (not self.prev_cudagraphs_enabled or shape_changed):
+        if new_cudagraphs and (not self.old_cudagraphs or shape_changed):
             need_cudagraphs_record = True
 
         # Pre-allocated output can be used when previous and current state are true without shape change
         if (
-            self.prev_pre_allocated_outputs_enabled
-            and pre_allocated_outputs_enabled
+            self.old_pre_allocated_outputs
+            and new_pre_allocated_output
             and (not shape_changed)
         ):
             can_use_pre_allocated_outputs = True
 
-        self.prev_cudagraphs_enabled = cudagraphs_enabled
-        self.prev_pre_allocated_outputs_enabled = pre_allocated_outputs_enabled
+        self.old_cudagraphs = new_cudagraphs
+        self.old_pre_allocated_outputs = new_pre_allocated_output
 
         return need_cudagraphs_record, can_use_pre_allocated_outputs