Skip to content

Commit 40f3eaf

Browse files
committed
chore: Rename variables
1 parent 7f58048 commit 40f3eaf

File tree

5 files changed

+32
-30
lines changed

5 files changed

+32
-30
lines changed

core/runtime/TRTEngine.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,8 @@ TRTEngine::TRTEngine(
9999
exec_ctx = make_trt(cuda_engine->createExecutionContext());
100100
TORCHTRT_CHECK((exec_ctx.get() != nullptr), "Unable to create TensorRT execution context");
101101

102-
runtime_states.prev_cudagraphs_enabled = CUDAGRAPHS_MODE;
103-
runtime_states.prev_pre_allocated_outputs_enabled = false;
102+
runtime_states.old_cudagraphs = CUDAGRAPHS_MODE;
103+
runtime_states.old_pre_allocated_outputs = false;
104104

105105
if (_in_binding_names.size() == 0 && _out_binding_names.size() == 0) {
106106
uint64_t inputs = 0;

core/runtime/TRTEngine.h

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -30,34 +30,30 @@ using FlattenedState = std::tuple<
3030
std::tuple<std::string, std::string>, // serialized metadata
3131
std::tuple<std::string, std::string>>; // Platform
3232

33-
struct RuntimeStates {
34-
bool need_cudagraphs_record;
35-
bool can_use_pre_allocated_outputs;
36-
};
37-
3833
struct TorchTRTRuntimeStates {
39-
// Previous runtime states
40-
bool prev_cudagraphs_enabled, prev_pre_allocated_outputs_enabled;
34+
// Indicates whether CUDAGraphs were enabled in the previous execute_engine
35+
bool old_cudagraphs;
36+
// Indicates whether pre-allocated output was enabled in the previous execute_engine
37+
bool old_pre_allocated_outputs;
4138

4239
// Evaluates whether certain conditions are met to enable CUDA Graph recording or to reuse pre-allocated outputs
4340
// based on the current and previous states, as well as input shape has changed
44-
RuntimeStates validate_states(bool cudagraphs_enabled, bool pre_allocated_outputs_enabled, bool shape_changed) {
41+
std::tuple<bool, bool> set_runtime_states(bool new_cudagraphs, bool new_pre_allocated_output, bool shape_changed) {
4542
bool need_cudagraphs_record = false;
4643
bool can_use_pre_allocated_outputs = false;
4744

4845
// Cudagraphs record is required if cudagraphs_enabled is switched to True regardless of shape change
49-
if (cudagraphs_enabled && (!prev_cudagraphs_enabled || shape_changed)) {
46+
if (new_cudagraphs && (!old_cudagraphs || shape_changed)) {
5047
need_cudagraphs_record = true;
5148
}
5249
// Pre-allocated output can be used when previous and current state are true without shape change
53-
if (prev_pre_allocated_outputs_enabled && pre_allocated_outputs_enabled && !shape_changed) {
50+
if (old_pre_allocated_outputs && new_pre_allocated_output && !shape_changed) {
5451
can_use_pre_allocated_outputs = true;
5552
}
56-
prev_cudagraphs_enabled = cudagraphs_enabled;
57-
prev_pre_allocated_outputs_enabled = pre_allocated_outputs_enabled;
53+
old_cudagraphs = new_cudagraphs;
54+
old_pre_allocated_outputs = new_pre_allocated_output;
5855

59-
RuntimeStates values = {need_cudagraphs_record, can_use_pre_allocated_outputs};
60-
return values;
56+
return {need_cudagraphs_record, can_use_pre_allocated_outputs};
6157
}
6258
};
6359

core/runtime/execute_engine.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,9 +203,11 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
203203
bool shape_changed = _validate_shapes(inputs, compiled_engine);
204204

205205
// Whether cudagraphs needs to record the graph on this pass
206-
RuntimeStates states = compiled_engine->runtime_states.validate_states(
206+
auto result = compiled_engine->runtime_states.set_runtime_states(
207207
CUDAGRAPHS_MODE, compiled_engine->use_pre_allocated_outputs, shape_changed);
208-
bool need_cudagraphs_record = states.need_cudagraphs_record;
208+
209+
bool need_cudagraphs_record = std::get<0>(result);
210+
bool can_use_pre_allocated_outputs = std::get<1>(result);
209211

210212
if (!CUDAGRAPHS_MODE || shape_changed) {
211213
compiled_engine->cudagraph.reset();
@@ -288,7 +290,7 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
288290
output_profiler_guard =
289291
std::make_unique<torch::autograd::profiler::RecordProfile>(compiled_engine->output_profile_path);
290292
}
291-
if (states.can_use_pre_allocated_outputs) {
293+
if (can_use_pre_allocated_outputs) {
292294
outputs = compiled_engine->pre_allocated_outputs;
293295
} else {
294296
outputs = create_output_tensors(compiled_engine);

examples/dynamo/pre_allocated_output_example.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,11 @@ def test_module_perf(model, *input):
8888
# Enable/Disable pre-allocated output buffer feature using runtime api
8989
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
9090

91-
# We can enable the pre-allocated output buffer with a context manager
91+
# Enable pre-allocated output buffer using a context manager
9292
with torch_tensorrt.runtime.enable_pre_allocated_outputs(optimized_model):
9393
out_trt = optimized_model(*inputs)
94+
# Subsequent inferences can use the pre-allocated output buffer (no shape change)
95+
out_trt = optimized_model(*inputs)
9496

9597
# Alternatively, we can enable the feature using a context object
9698
pre_allocated_output_ctx = torch_tensorrt.runtime.enable_pre_allocated_outputs(

py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,16 @@
2424

2525

2626
class TorchTRTRuntimeStates:
27-
def __init__(self, cudagraphs_enabled: bool, pre_allocated_outputs_enabled: bool):
28-
self.prev_cudagraphs_enabled = cudagraphs_enabled
29-
self.prev_pre_allocated_outputs_enabled = pre_allocated_outputs_enabled
27+
def __init__(self, new_cudagraphs: bool, new_pre_allocated_output: bool):
28+
# Indicates whether CUDAGraphs were enabled in the previous execute_engine
29+
self.old_cudagraphs = new_cudagraphs
30+
# Indicates whether pre-allocated output was enabled in the previous execute_engine
31+
self.old_pre_allocated_outputs = new_pre_allocated_output
3032

3133
def validate_states(
3234
self,
33-
cudagraphs_enabled: bool,
34-
pre_allocated_outputs_enabled: bool,
35+
new_cudagraphs: bool,
36+
new_pre_allocated_output: bool,
3537
shape_changed: bool,
3638
) -> Tuple[bool, bool]:
3739
# Evaluates whether certain conditions are met to enable CUDA Graph recording or to reuse pre-allocated outputs
@@ -40,19 +42,19 @@ def validate_states(
4042
can_use_pre_allocated_outputs = False
4143

4244
# Cudagraphs record is required if cudagraphs_enabled is switched to True regardless of shape change
43-
if cudagraphs_enabled and (not self.prev_cudagraphs_enabled or shape_changed):
45+
if new_cudagraphs and (not self.old_cudagraphs or shape_changed):
4446
need_cudagraphs_record = True
4547

4648
# Pre-allocated output can be used when previous and current state are true without shape change
4749
if (
48-
self.prev_pre_allocated_outputs_enabled
49-
and pre_allocated_outputs_enabled
50+
self.old_pre_allocated_outputs
51+
and new_pre_allocated_output
5052
and (not shape_changed)
5153
):
5254
can_use_pre_allocated_outputs = True
5355

54-
self.prev_cudagraphs_enabled = cudagraphs_enabled
55-
self.prev_pre_allocated_outputs_enabled = pre_allocated_outputs_enabled
56+
self.old_cudagraphs = new_cudagraphs
57+
self.old_pre_allocated_outputs = new_pre_allocated_output
5658

5759
return need_cudagraphs_record, can_use_pre_allocated_outputs
5860

0 commit comments

Comments
 (0)