Skip to content

Commit b679811

Browse files
committed
chore: setting for test
1 parent 41aef0f commit b679811

File tree

5 files changed

+9
-11
lines changed

5 files changed

+9
-11
lines changed

core/runtime/TRTEngine.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -319,10 +319,6 @@ int64_t TRTEngine::get_automatic_device_memory_budget() {
319319
return cuda_engine->getWeightStreamingAutomaticBudget();
320320
}
321321

322-
void TRTEngine::set_pre_allocated_outputs(bool enable) {
323-
use_pre_allocated_outputs = enable;
324-
}
325-
326322
std::string TRTEngine::to_str() const {
327323
// clang-format off
328324
std::stringstream ss;

core/runtime/execute_engine.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
#include "torch/csrc/jit/runtime/custom_operator.h"
66
#include "torch/torch.h"
77

8-
#include <ATen/record_function.h>
98
#include "core/runtime/TRTEngineProfiler.h"
109
#include "core/runtime/runtime.h"
1110
#include "core/util/prelude.h"
@@ -202,7 +201,6 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
202201

203202
{ // Input Setup
204203
std::unique_ptr<torch::autograd::profiler::RecordProfile> input_profiler_guard;
205-
RECORD_FUNCTION("process input", std::vector<c10::IValue>());
206204
if (compiled_engine->profile_execution) {
207205
input_profiler_guard =
208206
std::make_unique<torch::autograd::profiler::RecordProfile>(compiled_engine->input_profile_path);
@@ -284,7 +282,6 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
284282

285283
{ // Output Setup
286284
std::unique_ptr<torch::autograd::profiler::RecordProfile> output_profiler_guard;
287-
RECORD_FUNCTION("process output", std::vector<c10::IValue>());
288285
if (compiled_engine->profile_execution) {
289286
output_profiler_guard =
290287
std::make_unique<torch::autograd::profiler::RecordProfile>(compiled_engine->output_profile_path);
@@ -330,7 +327,6 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
330327
}
331328

332329
{ // Engine Execution (execute on engine stream)
333-
RECORD_FUNCTION("Trt runtime", std::vector<c10::IValue>());
334330
c10::cuda::CUDAStreamGuard stream_guard(compiled_engine->engine_stream);
335331

336332
std::unique_ptr<torch::autograd::profiler::RecordProfile> enqueue_profiler_guard;

core/runtime/register_jit_hooks.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ static auto TORCHTRT_UNUSED TRTEngineTSRegistrtion =
8888
.def("dump_engine_layer_info", &TRTEngine::dump_engine_layer_info)
8989
.def("get_engine_layer_info", &TRTEngine::get_engine_layer_info)
9090
.def("infer_outputs", &TRTEngine::infer_outputs)
91-
.def("set_pre_allocated_outputs", &TRTEngine::set_pre_allocated_outputs)
91+
.def_readwrite("use_pre_allocated_outputs", &TRTEngine::use_pre_allocated_outputs)
9292
.def_property(
9393
"device_memory_budget",
9494
&TRTEngine::get_device_memory_budget,

py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def __init__(
110110
self.target_platform = Platform.current_platform()
111111
self.cudagraphs_enabled = False
112112
self.pre_allocated_outputs: List[torch.Tensor] = []
113-
self.use_pre_allocated_outputs = False
113+
self.use_pre_allocated_outputs = True
114114

115115
if self.serialized_engine is not None and not self.settings.lazy_engine_init:
116116
self.setup_engine()
@@ -249,6 +249,9 @@ def create_output_tensors(self) -> List[torch.Tensor]:
249249
outputs.append(output)
250250
return outputs
251251

252+
def set_output_opt(self, enable: bool) -> None:
253+
self.use_pre_allocated_outputs = enable
254+
252255
def forward(self, *inputs: torch.Tensor) -> torch.Tensor | Tuple[torch.Tensor, ...]:
253256
# Ensure inputs are available in all scopes and cast symbolic integers to Tensors
254257
contiguous_inputs: List[torch.Tensor] = [

py/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ def setup_engine(self) -> None:
208208
if self.engine is not None:
209209
return
210210
self.engine = torch.classes.tensorrt.Engine(self._pack_engine_info())
211-
self.engine.set_pre_allocated_outputs(True)
211+
self.set_output_opt(True)
212212

213213
def encode_metadata(self, metadata: Any) -> str:
214214
metadata = copy.deepcopy(metadata)
@@ -273,6 +273,9 @@ def set_extra_state(self, state: SerializedTorchTensorRTModuleFmt) -> None:
273273
self.input_binding_names = state[2]
274274
self.output_binding_names = state[3]
275275

276+
def set_output_opt(self, enable: bool) -> None:
277+
self.engine.use_pre_allocated_outputs = enable
278+
276279
def forward(self, *inputs: Any) -> torch.Tensor | Tuple[torch.Tensor, ...]:
277280
"""Implementation of the forward pass for a TensorRT engine
278281

0 commit comments

Comments
 (0)