chore: updates

peri044 · peri044 · commit 3addcae373f5 · 2024-04-13T01:07:35.000-07:00
diff --git a/.github/scripts/install-torch-tensorrt.sh b/.github/scripts/install-torch-tensorrt.sh
@@ -6,12 +6,9 @@ ${CONDA_RUN} ${PIP_INSTALL_TORCH} torchvision
 ${CONDA_RUN} python -m pip install pyyaml mpmath==1.3.0
 export TRT_VERSION=$(${CONDA_RUN} python -c "import versions; versions.tensorrt_version()")
 
-# Print PYTHON_VERSION
-printf "PYTHON_VERSION is equal to %s" ${PYTHON_VERSION//./}
-
 # Install TensorRT manually
-wget -P /opt/torch-tensorrt-builds/ https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.0/TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-12.4.tar.gz
-tar -xvzf /opt/torch-tensorrt-builds/TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-12.4.tar.gz -C /opt/torch-tensorrt-builds/
+wget -q -P /opt/torch-tensorrt-builds/ https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.0/TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-12.4.tar.gz
+tar -xzf /opt/torch-tensorrt-builds/TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-12.4.tar.gz -C /opt/torch-tensorrt-builds/
 python -m pip install /opt/torch-tensorrt-builds/TensorRT-10.0.0.6/python/tensorrt-10.0.0b6-cp${PYTHON_VERSION//./}-none-linux_x86_64.whl
 
 # Install Torch-TensorRT
diff --git a/core/runtime/execute_engine.cpp b/core/runtime/execute_engine.cpp
@@ -178,7 +178,7 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
       enqueue_profiler_guard =
           std::make_unique<torch::autograd::profiler::RecordProfile>(compiled_engine->enqueue_profile_path);
     }
-    c10::cuda::CUDAStream stream = c10::cuda::getStreamFromPool(/*isHighPriority=*/true, inputs[0].device().index());
+    c10::cuda::CUDAStream stream = c10::cuda::getCurrentCUDAStream(inputs[0].device().index());
     // nvinfer1::IExecutionContext::enqueue is not thread safe and we need a mutex for it.
     std::unique_lock<std::mutex> lock(compiled_engine->mu);
     compiled_engine->exec_ctx->enqueueV3(stream);
diff --git a/packaging/pre_build_script.sh b/packaging/pre_build_script.sh
@@ -3,9 +3,9 @@
 # Install dependencies
 python3 -m pip install pyyaml
 yum install -y ninja-build gettext
-TRT_VERSION=10.0.0.6 #$(python3 -c "import versions; versions.tensorrt_version()")
-wget -P /opt/torch-tensorrt-builds/ https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.0/TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-12.4.tar.gz
-tar -xvzf /opt/torch-tensorrt-builds/TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-12.4.tar.gz -C /opt/torch-tensorrt-builds/
+TRT_VERSION=$(python3 -c "import versions; versions.tensorrt_version()")
+wget -q -P /opt/torch-tensorrt-builds/ https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.0/TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-12.4.tar.gz
+tar -xzf /opt/torch-tensorrt-builds/TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-12.4.tar.gz -C /opt/torch-tensorrt-builds/
 export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.0.6/lib:$LD_LIBRARY_PATH
 wget https://github.com/bazelbuild/bazelisk/releases/download/v1.17.0/bazelisk-linux-amd64 \
     && mv bazelisk-linux-amd64 /usr/bin/bazel \

Original file line number	Diff line number	Diff line change
`@@ -178,7 +178,7 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr`
`178`	`178`	`enqueue_profiler_guard =`
`179`	`179`	`std::make_unique<torch::autograd::profiler::RecordProfile>(compiled_engine->enqueue_profile_path);`
`180`	`180`	`}`
`181`		`- c10::cuda::CUDAStream stream = c10::cuda::getStreamFromPool(/isHighPriority=/true, inputs[0].device().index());`
	`181`	`+ c10::cuda::CUDAStream stream = c10::cuda::getCurrentCUDAStream(inputs[0].device().index());`
`182`	`182`	`// nvinfer1::IExecutionContext::enqueue is not thread safe and we need a mutex for it.`
`183`	`183`	`std::unique_lock<std::mutex> lock(compiled_engine->mu);`
`184`	`184`	`compiled_engine->exec_ctx->enqueueV3(stream);`