Skip to content

chore: update docker, refactor CI TRT dep to main #2793

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions .github/scripts/install-torch-tensorrt.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,7 @@ ${CONDA_RUN} ${PIP_INSTALL_TORCH} torchvision
${CONDA_RUN} python -m pip install pyyaml mpmath==1.3.0
export TRT_VERSION=$(${CONDA_RUN} python -c "import versions; versions.tensorrt_version()")

# Install TensorRT manually
wget -q -P /opt/torch-tensorrt-builds/ https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz
tar -xzf /opt/torch-tensorrt-builds/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz -C /opt/torch-tensorrt-builds/
python -m pip install /opt/torch-tensorrt-builds/TensorRT-10.0.1.6/python/tensorrt-10.0.1-cp${PYTHON_VERSION//./}-none-linux_x86_64.whl

# Install Torch-TensorRT
${CONDA_RUN} python -m pip install /opt/torch-tensorrt-builds/torch_tensorrt*+${CU_VERSION}*.whl
${CONDA_RUN} python -m pip install /opt/torch-tensorrt-builds/torch_tensorrt*+${CU_VERSION}*.whl tensorrt~=${TRT_VERSION} --extra-index-url=https://pypi.ngc.nvidia.com

echo -e "Running test script";
11 changes: 1 addition & 10 deletions .github/workflows/build-test-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,16 +77,13 @@ jobs:
script: |
export USE_HOST_DEPS=1
export LD_LIBRARY_PATH=/usr/lib64:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.1.6/lib:$LD_LIBRARY_PATH
pushd .
cd tests/modules
# Don't use requirements.txt here as it contains tensorrt and torch which should have been installed by now.
${CONDA_RUN} python -m pip install numpy packaging pyyaml transformers timm pybind11==2.6.2
${CONDA_RUN} python -m pip install --pre -r ../py/requirements.txt
${CONDA_RUN} python hub.py
popd
pushd .
cd tests/py/ts
${CONDA_RUN} python -m pip install --pre -r ../requirements.txt --use-deprecated=legacy-resolver
${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_api_test_results.xml api/
${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_models_test_results.xml models/
${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_integrations_test_results.xml integrations/
Expand Down Expand Up @@ -114,7 +111,6 @@ jobs:
pre-script: ${{ matrix.pre-script }}
script: |
export USE_HOST_DEPS=1
export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.1.6/lib:$LD_LIBRARY_PATH
pushd .
cd tests/py/dynamo
${CONDA_RUN} python -m pip install --pre -r ../requirements.txt --use-deprecated=legacy-resolver
Expand Down Expand Up @@ -143,7 +139,6 @@ jobs:
pre-script: ${{ matrix.pre-script }}
script: |
export USE_HOST_DEPS=1
export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.1.6/lib:$LD_LIBRARY_PATH
pushd .
cd tests/py/dynamo
${CONDA_RUN} python -m pip install --pre -r ../requirements.txt --use-deprecated=legacy-resolver
Expand Down Expand Up @@ -173,7 +168,6 @@ jobs:
pre-script: ${{ matrix.pre-script }}
script: |
export USE_HOST_DEPS=1
export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.1.6/lib:$LD_LIBRARY_PATH
pushd .
cd tests/py/dynamo
${CONDA_RUN} python -m pip install --pre -r ../requirements.txt --use-deprecated=legacy-resolver
Expand Down Expand Up @@ -202,7 +196,6 @@ jobs:
pre-script: ${{ matrix.pre-script }}
script: |
export USE_HOST_DEPS=1
export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.1.6/lib:$LD_LIBRARY_PATH
pushd .
cd tests/py/dynamo
${CONDA_RUN} python -m pip install --pre -r ../requirements.txt --use-deprecated=legacy-resolver
Expand Down Expand Up @@ -233,7 +226,6 @@ jobs:
pre-script: ${{ matrix.pre-script }}
script: |
export USE_HOST_DEPS=1
export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.1.6/lib:$LD_LIBRARY_PATH
pushd .
cd tests/py/dynamo
${CONDA_RUN} python -m pip install --pre -r ../requirements.txt --use-deprecated=legacy-resolver
Expand Down Expand Up @@ -263,7 +255,6 @@ jobs:
pre-script: ${{ matrix.pre-script }}
script: |
export USE_HOST_DEPS=1
export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.1.6/lib:$LD_LIBRARY_PATH
pushd .
cd tests/py/core
${CONDA_RUN} python -m pip install --pre -r ../requirements.txt --use-deprecated=legacy-resolver
Expand Down
9 changes: 1 addition & 8 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,10 @@ RUN pyenv install -v ${PYTHON_VERSION}
RUN pyenv global ${PYTHON_VERSION}

# Install TensorRT + dependencies
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-ubuntu2204.pin
RUN mv cuda-ubuntu2204.pin /etc/apt/preferences.d/cuda-repository-pin-600
RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/7fa2af80.pub
RUN add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /"
RUN apt-get update

RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub
RUN add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /"
RUN apt-get update

RUN apt-get install -y libnvinfer8=${TENSORRT_VERSION}.* libnvinfer-plugin8=${TENSORRT_VERSION}.* libnvinfer-dev=${TENSORRT_VERSION}.* libnvinfer-plugin-dev=${TENSORRT_VERSION}.* libnvonnxparsers8=${TENSORRT_VERSION}.* libnvonnxparsers-dev=${TENSORRT_VERSION}.* libnvparsers8=${TENSORRT_VERSION}.* libnvparsers-dev=${TENSORRT_VERSION}.* libnvinfer-headers-dev=${TENSORRT_VERSION}.* libnvinfer-headers-plugin-dev=${TENSORRT_VERSION}.*
RUN TENSORRT_MAJOR_VERSION=`echo ${TENSORRT_VERSION} | cut -d '.' -f 1` && apt-get install -y libnvinfer${TENSORRT_MAJOR_VERSION}=${TENSORRT_VERSION}.* libnvinfer-plugin${TENSORRT_MAJOR_VERSION}=${TENSORRT_VERSION}.* libnvinfer-dev=${TENSORRT_VERSION}.* libnvinfer-plugin-dev=${TENSORRT_VERSION}.* libnvonnxparsers${TENSORRT_MAJOR_VERSION}=${TENSORRT_VERSION}.* libnvonnxparsers-dev=${TENSORRT_VERSION}.*

# Setup Bazel via Bazelisk
RUN wget -q https://github.com/bazelbuild/bazelisk/releases/download/v1.17.0/bazelisk-linux-amd64 -O /usr/bin/bazel &&\
Expand Down
6 changes: 3 additions & 3 deletions docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Use `Dockerfile` to build a container which provides the exact development environment that our master branch is usually tested against.

* The `Dockerfile` currently uses <a href="https://github.com/bazelbuild/bazelisk">Bazelisk</a> to select the Bazel version, and uses the exact library versions of Torch and CUDA listed in <a href="https://github.com/pytorch/TensorRT#dependencies">dependencies</a>.
* The desired version of TensorRT must be specified as build-args, with major and minor versions as in: `--build-arg TENSORRT_VERSION=a.b`
* The desired versions of TensorRT must be specified as build-args, with major and minor versions as in: `--build-arg TENSORRT_VERSION=a.b`
* [**Optional**] The desired base image be changed by explicitly setting a base image, as in `--build-arg BASE_IMG=nvidia/cuda:11.8.0-devel-ubuntu22.04`, though this is optional
* [**Optional**] Additionally, the desired Python version can be changed by explicitly setting a version, as in `--build-arg PYTHON_VERSION=3.10`, though this is optional as well.

Expand All @@ -17,14 +17,14 @@ Note: By default the container uses the `pre-cxx11-abi` version of Torch + Torch

### Instructions

- The example below uses TensorRT 8.6
- The example below uses TensorRT 10.0.1.6
- See <a href="https://github.com/pytorch/TensorRT#dependencies">dependencies</a> for a list of current default dependencies.

> From root of Torch-TensorRT repo

Build:
```
DOCKER_BUILDKIT=1 docker build --build-arg TENSORRT_VERSION=8.6 -f docker/Dockerfile -t torch_tensorrt:latest .
DOCKER_BUILDKIT=1 docker build --build-arg TENSORRT_VERSION=10.0.1 -f docker/Dockerfile -t torch_tensorrt:latest .
```

Run:
Expand Down
3 changes: 0 additions & 3 deletions packaging/pre_build_script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@
python3 -m pip install pyyaml
yum install -y ninja-build gettext
TRT_VERSION=$(python3 -c "import versions; versions.tensorrt_version()")
wget -q -P /opt/torch-tensorrt-builds/ https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz
tar -xzf /opt/torch-tensorrt-builds/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz -C /opt/torch-tensorrt-builds/
export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.1.6/lib:$LD_LIBRARY_PATH
wget https://github.com/bazelbuild/bazelisk/releases/download/v1.17.0/bazelisk-linux-amd64 \
&& mv bazelisk-linux-amd64 /usr/bin/bazel \
&& chmod +x /usr/bin/bazel
Expand Down
6 changes: 4 additions & 2 deletions tests/py/dynamo/lowering/test_aten_lowering_passes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@
import unittest

import torch
from torch.testing._internal.common_utils import TestCase, run_tests

import torch_tensorrt
from torch.testing._internal.common_utils import TestCase, run_tests

from ..testing_utilities import DECIMALS_OF_AGREEMENT, lower_graph_testing

Expand Down Expand Up @@ -397,6 +396,9 @@ def forward(self, q, k, v):


class TestLowerLinear(TestCase):
@unittest.skip(
"This test has threshold failures. This is tracked at https://github.com/pytorch/TensorRT/issues/2715",
)
def test_lower_linear(self):
class Linear(torch.nn.Module):
def forward(self, input, weight, bias):
Expand Down
97 changes: 0 additions & 97 deletions tests/py/dynamo/runtime/test_hw_compat.py

This file was deleted.

7 changes: 5 additions & 2 deletions toolchains/ci_workspaces/WORKSPACE.x86_64.release.rhel.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,12 @@ http_archive(

http_archive(
name = "tensorrt",
urls = ["file:////opt/torch-tensorrt-builds/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz",],
build_file = "@//third_party/tensorrt/archive:BUILD",
strip_prefix = "TensorRT-10.0.1.6"
sha256 = "a5cd2863793d69187ce4c73b2fffc1f470ff28cfd91e3640017e53b8916453d5",
strip_prefix = "TensorRT-10.0.1.6",
urls = [
"https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz",
],
)

# #########################################################################
Expand Down
Loading