Skip to content

Commit d0ef3cd

Browse files
committed
chore: setting for test
1 parent 65ea0b1 commit d0ef3cd

File tree

5 files changed

+9
-11
lines changed

5 files changed

+9
-11
lines changed

core/runtime/TRTEngine.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -296,10 +296,6 @@ int64_t TRTEngine::get_automatic_device_memory_budget() {
296296
return cuda_engine->getWeightStreamingAutomaticBudget();
297297
}
298298

299-
void TRTEngine::set_pre_allocated_outputs(bool enable) {
300-
use_pre_allocated_outputs = enable;
301-
}
302-
303299
std::string TRTEngine::to_str() const {
304300
// clang-format off
305301
std::stringstream ss;

core/runtime/execute_engine.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
#include "torch/csrc/jit/runtime/custom_operator.h"
66
#include "torch/torch.h"
77

8-
#include <ATen/record_function.h>
98
#include "core/runtime/TRTEngineProfiler.h"
109
#include "core/runtime/runtime.h"
1110
#include "core/util/prelude.h"
@@ -200,7 +199,6 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
200199

201200
{ // Input Setup
202201
std::unique_ptr<torch::autograd::profiler::RecordProfile> input_profiler_guard;
203-
RECORD_FUNCTION("process input", std::vector<c10::IValue>());
204202
if (compiled_engine->profile_execution) {
205203
input_profiler_guard =
206204
std::make_unique<torch::autograd::profiler::RecordProfile>(compiled_engine->input_profile_path);
@@ -282,7 +280,6 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
282280

283281
{ // Output Setup
284282
std::unique_ptr<torch::autograd::profiler::RecordProfile> output_profiler_guard;
285-
RECORD_FUNCTION("process output", std::vector<c10::IValue>());
286283
if (compiled_engine->profile_execution) {
287284
output_profiler_guard =
288285
std::make_unique<torch::autograd::profiler::RecordProfile>(compiled_engine->output_profile_path);
@@ -331,7 +328,6 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
331328
std::unique_lock<std::mutex> lock(compiled_engine->mu);
332329

333330
{ // Engine Execution (execute on engine stream)
334-
RECORD_FUNCTION("Trt runtime", std::vector<c10::IValue>());
335331
c10::cuda::CUDAStreamGuard stream_guard(compiled_engine->engine_stream);
336332

337333
std::unique_ptr<torch::autograd::profiler::RecordProfile> enqueue_profiler_guard;

core/runtime/register_jit_hooks.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ static auto TORCHTRT_UNUSED TRTEngineTSRegistrtion =
8686
.def("dump_engine_layer_info_to_file", &TRTEngine::dump_engine_layer_info_to_file)
8787
.def("dump_engine_layer_info", &TRTEngine::dump_engine_layer_info)
8888
.def("get_engine_layer_info", &TRTEngine::get_engine_layer_info)
89-
.def("set_pre_allocated_outputs", &TRTEngine::set_pre_allocated_outputs)
89+
.def_readwrite("use_pre_allocated_outputs", &TRTEngine::use_pre_allocated_outputs)
9090
.def_property(
9191
"device_memory_budget",
9292
&TRTEngine::get_device_memory_budget,

py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def __init__(
109109
self.target_platform = Platform.current_platform()
110110
self.cudagraphs_enabled = False
111111
self.pre_allocated_outputs: List[torch.Tensor] = []
112-
self.use_pre_allocated_outputs = False
112+
self.use_pre_allocated_outputs = True
113113

114114
if self.serialized_engine is not None and not self.settings.lazy_engine_init:
115115
self.setup_engine()
@@ -248,6 +248,9 @@ def create_output_tensors(self) -> List[torch.Tensor]:
248248
outputs.append(output)
249249
return outputs
250250

251+
def set_output_opt(self, enable: bool) -> None:
252+
self.use_pre_allocated_outputs = enable
253+
251254
def forward(self, *inputs: torch.Tensor) -> torch.Tensor | Tuple[torch.Tensor, ...]:
252255
# Ensure inputs are available in all scopes and cast symbolic integers to Tensors
253256
contiguous_inputs: List[torch.Tensor] = [

py/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ def setup_engine(self) -> None:
203203
if self.engine is not None:
204204
return
205205
self.engine = torch.classes.tensorrt.Engine(self._pack_engine_info())
206-
self.engine.set_pre_allocated_outputs(True)
206+
self.set_output_opt(True)
207207

208208
def encode_metadata(self, metadata: Any) -> str:
209209
metadata = copy.deepcopy(metadata)
@@ -268,6 +268,9 @@ def set_extra_state(self, state: SerializedTorchTensorRTModuleFmt) -> None:
268268
self.input_binding_names = state[2]
269269
self.output_binding_names = state[3]
270270

271+
def set_output_opt(self, enable: bool) -> None:
272+
self.engine.use_pre_allocated_outputs = enable
273+
271274
def forward(self, *inputs: Any) -> torch.Tensor | Tuple[torch.Tensor, ...]:
272275
"""Implementation of the forward pass for a TensorRT engine
273276

0 commit comments

Comments
 (0)