Skip to content

Commit 37ea7c2

Browse files
author
Naren Dasan
committed
fix: Fix the CUDAGraphs C++ runtime implementation
Signed-off-by: Naren Dasan <[email protected]> Signed-off-by: Naren Dasan <[email protected]>
1 parent 655ed6b commit 37ea7c2

File tree

15 files changed

+491
-375
lines changed

15 files changed

+491
-375
lines changed

.github/workflows/build-test-linux.yml

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ on:
88
- nightly
99
- release/*
1010
tags:
11-
# NOTE: Binary build pipelines should only get triggered on release candidate builds
12-
# Release candidate tags look like: v1.11.0-rc1
13-
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
11+
# NOTE: Binary build pipelines should only get triggered on release candidate builds
12+
# Release candidate tags look like: v1.11.0-rc1
13+
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
1414
workflow_dispatch:
1515

1616
jobs:
@@ -229,7 +229,9 @@ jobs:
229229
export USE_HOST_DEPS=1
230230
pushd .
231231
cd tests/py/dynamo
232-
python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml runtime/
232+
python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml --ignore test_cudagraphs_py.py --ignore test_cudagraphs_cpp.py runtime/
233+
python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results_cuda_graphs_cpp.xml runtime/test_cudagraphs_cpp.py
234+
python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results_cuda_graphs_py.xml runtime/test_cudagraphs_py.py
233235
python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/
234236
python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/
235237
popd

.github/workflows/build-test-windows.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ on:
88
- nightly
99
- release/*
1010
tags:
11-
# NOTE: Binary build pipelines should only get triggered on release candidate builds
12-
# Release candidate tags look like: v1.11.0-rc1
13-
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
11+
# NOTE: Binary build pipelines should only get triggered on release candidate builds
12+
# Release candidate tags look like: v1.11.0-rc1
13+
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
1414
workflow_dispatch:
1515

1616
jobs:
@@ -219,7 +219,7 @@ jobs:
219219
export USE_HOST_DEPS=1
220220
pushd .
221221
cd tests/py/dynamo
222-
python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml runtime/
222+
python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml runtime/
223223
python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/
224224
python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/
225225
popd

core/conversion/var/Var.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ bool Var::isITensorList() {
153153
// Unpack the Var as a List and check if each entry is a custom class since
154154
// ITensors are stored in CustomClassHolder
155155
auto ival_list = ptr_.ivalue->toList();
156-
for (int i = 0; i < ival_list.size(); i++) {
156+
for (size_t i = 0; i < ival_list.size(); i++) {
157157
if (!ival_list.get(i).isCustomClass()) {
158158
return false;
159159
}
@@ -167,7 +167,7 @@ std::vector<nvinfer1::ITensor*> Var::unwrapToITensorList() {
167167
TORCHTRT_CHECK(isITensorList(), "Expected IValue to be an ITensorList");
168168
auto ivalue_list = ptr_.ivalue->toList();
169169
std::vector<nvinfer1::ITensor*> outputs;
170-
for (int i = 0; i < ivalue_list.size(); i++) {
170+
for (size_t i = 0; i < ivalue_list.size(); i++) {
171171
auto element = ivalue_list.get(i).toCustomClass<TensorContainer>()->tensor();
172172
outputs.push_back(std::move(element));
173173
}

core/runtime/TRTEngine.cpp

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -71,15 +71,6 @@ TRTEngine::TRTEngine(
7171
multi_gpu_device_check();
7272
set_rt_device(device_info);
7373

74-
// Set active stream to non-default stream
75-
auto current_stream = c10::cuda::getCurrentCUDAStream(device_info.id);
76-
if (current_stream == c10::cuda::getDefaultCUDAStream(device_info.id)) {
77-
active_stream = c10::cuda::getStreamFromPool(false, device_info.id);
78-
c10::cuda::setCurrentCUDAStream(active_stream);
79-
} else {
80-
active_stream = current_stream;
81-
}
82-
8374
rt = make_trt(nvinfer1::createInferRuntime(util::logging::get_logger()));
8475

8576
name = slugify(mod_name);
@@ -253,6 +244,7 @@ void TRTEngine::set_profiling_paths() {
253244
enqueue_profile_path = std::filesystem::path{profile_path_prefix + "/" + name + "_enqueue_profile.trace"}.string();
254245
trt_engine_profile_path =
255246
std::filesystem::path{profile_path_prefix + "/" + name + "_engine_exectuion_profile.trace"}.string();
247+
cuda_graph_debug_path = std::filesystem::path{profile_path_prefix + "/" + name + "_cuda_graph.dot"}.string();
256248
}
257249

258250
std::string TRTEngine::to_str() const {

core/runtime/TRTEngine.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@ struct TRTEngine : torch::CustomClassHolder {
7070

7171
// CUDAGraph-Related Functionality
7272
at::cuda::CUDAGraph cudagraph = {};
73-
at::cuda::CUDAStream active_stream = c10::cuda::getDefaultCUDAStream();
73+
at::cuda::CUDAStream engine_stream = c10::cuda::getDefaultCUDAStream();
74+
at::cuda::CUDAStream caller_stream = c10::cuda::getDefaultCUDAStream();
7475
std::vector<at::Tensor> input_buffers = {};
7576
std::vector<at::Tensor> output_buffers = {};
7677
std::string shape_key;
@@ -89,6 +90,7 @@ struct TRTEngine : torch::CustomClassHolder {
8990
std::string output_profile_path;
9091
std::string enqueue_profile_path;
9192
std::string trt_engine_profile_path;
93+
std::string cuda_graph_debug_path;
9294
std::mutex mu;
9395
std::unique_ptr<TRTEngineProfiler> trt_engine_profiler;
9496
};

0 commit comments

Comments
 (0)