File tree Expand file tree Collapse file tree 15 files changed +491
-375
lines changed
conversion/impl/elementwise Expand file tree Collapse file tree 15 files changed +491
-375
lines changed Original file line number Diff line number Diff line change 8
8
- nightly
9
9
- release/*
10
10
tags :
11
- # NOTE: Binary build pipelines should only get triggered on release candidate builds
12
- # Release candidate tags look like: v1.11.0-rc1
13
- - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
11
+ # NOTE: Binary build pipelines should only get triggered on release candidate builds
12
+ # Release candidate tags look like: v1.11.0-rc1
13
+ - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
14
14
workflow_dispatch :
15
15
16
16
jobs :
@@ -229,7 +229,9 @@ jobs:
229
229
export USE_HOST_DEPS=1
230
230
pushd .
231
231
cd tests/py/dynamo
232
- python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml runtime/
232
+ python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml --ignore test_cudagraphs_py.py --ignore test_cudagraphs_cpp.py runtime/
233
+ python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results_cuda_graphs_cpp.xml runtime/test_cudagraphs_cpp.py
234
+ python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results_cuda_graphs_py.xml runtime/test_cudagraphs_py.py
233
235
python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/
234
236
python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/
235
237
popd
Original file line number Diff line number Diff line change 8
8
- nightly
9
9
- release/*
10
10
tags :
11
- # NOTE: Binary build pipelines should only get triggered on release candidate builds
12
- # Release candidate tags look like: v1.11.0-rc1
13
- - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
11
+ # NOTE: Binary build pipelines should only get triggered on release candidate builds
12
+ # Release candidate tags look like: v1.11.0-rc1
13
+ - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
14
14
workflow_dispatch :
15
15
16
16
jobs :
@@ -219,7 +219,7 @@ jobs:
219
219
export USE_HOST_DEPS=1
220
220
pushd .
221
221
cd tests/py/dynamo
222
- python -m pytest -n 4 - -junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml runtime/
222
+ python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml runtime/
223
223
python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/
224
224
python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/
225
225
popd
Original file line number Diff line number Diff line change @@ -153,7 +153,7 @@ bool Var::isITensorList() {
153
153
// Unpack the Var as a List and check if each entry is a custom class since
154
154
// ITensors are stored in CustomClassHolder
155
155
auto ival_list = ptr_.ivalue ->toList ();
156
- for (int i = 0 ; i < ival_list.size (); i++) {
156
+ for (size_t i = 0 ; i < ival_list.size (); i++) {
157
157
if (!ival_list.get (i).isCustomClass ()) {
158
158
return false ;
159
159
}
@@ -167,7 +167,7 @@ std::vector<nvinfer1::ITensor*> Var::unwrapToITensorList() {
167
167
TORCHTRT_CHECK (isITensorList (), " Expected IValue to be an ITensorList" );
168
168
auto ivalue_list = ptr_.ivalue ->toList ();
169
169
std::vector<nvinfer1::ITensor*> outputs;
170
- for (int i = 0 ; i < ivalue_list.size (); i++) {
170
+ for (size_t i = 0 ; i < ivalue_list.size (); i++) {
171
171
auto element = ivalue_list.get (i).toCustomClass <TensorContainer>()->tensor ();
172
172
outputs.push_back (std::move (element));
173
173
}
Original file line number Diff line number Diff line change @@ -71,15 +71,6 @@ TRTEngine::TRTEngine(
71
71
multi_gpu_device_check ();
72
72
set_rt_device (device_info);
73
73
74
- // Set active stream to non-default stream
75
- auto current_stream = c10::cuda::getCurrentCUDAStream (device_info.id );
76
- if (current_stream == c10::cuda::getDefaultCUDAStream (device_info.id )) {
77
- active_stream = c10::cuda::getStreamFromPool (false , device_info.id );
78
- c10::cuda::setCurrentCUDAStream (active_stream);
79
- } else {
80
- active_stream = current_stream;
81
- }
82
-
83
74
rt = make_trt (nvinfer1::createInferRuntime (util::logging::get_logger ()));
84
75
85
76
name = slugify (mod_name);
@@ -253,6 +244,7 @@ void TRTEngine::set_profiling_paths() {
253
244
enqueue_profile_path = std::filesystem::path{profile_path_prefix + " /" + name + " _enqueue_profile.trace" }.string ();
254
245
trt_engine_profile_path =
255
246
std::filesystem::path{profile_path_prefix + " /" + name + " _engine_exectuion_profile.trace" }.string ();
247
+ cuda_graph_debug_path = std::filesystem::path{profile_path_prefix + " /" + name + " _cuda_graph.dot" }.string ();
256
248
}
257
249
258
250
std::string TRTEngine::to_str () const {
Original file line number Diff line number Diff line change @@ -70,7 +70,8 @@ struct TRTEngine : torch::CustomClassHolder {
70
70
71
71
// CUDAGraph-Related Functionality
72
72
at::cuda::CUDAGraph cudagraph = {};
73
- at::cuda::CUDAStream active_stream = c10::cuda::getDefaultCUDAStream();
73
+ at::cuda::CUDAStream engine_stream = c10::cuda::getDefaultCUDAStream();
74
+ at::cuda::CUDAStream caller_stream = c10::cuda::getDefaultCUDAStream();
74
75
std::vector<at::Tensor> input_buffers = {};
75
76
std::vector<at::Tensor> output_buffers = {};
76
77
std::string shape_key;
@@ -89,6 +90,7 @@ struct TRTEngine : torch::CustomClassHolder {
89
90
std::string output_profile_path;
90
91
std::string enqueue_profile_path;
91
92
std::string trt_engine_profile_path;
93
+ std::string cuda_graph_debug_path;
92
94
std::mutex mu;
93
95
std::unique_ptr<TRTEngineProfiler> trt_engine_profiler;
94
96
};
You can’t perform that action at this time.
0 commit comments