Skip to content

Commit a38bee7

Browse files
Add tensorrt test workflow (#3266)
1 parent cc0d8af commit a38bee7

File tree

6 files changed

+800
-2
lines changed

6 files changed

+800
-2
lines changed
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import copy
5+
import json
6+
import sys
7+
8+
# please update the cuda version you want to test with the future tensorRT version here
9+
# channel: nightly if the future tensorRT version test workflow is triggered from the main branch or your personal branch
10+
# channel: test if the future tensorRT version test workflow is triggered from the release branch(release/2.5 etc....)
11+
CUDA_VERSIONS_DICT = {
12+
"nightly": ["cu124"],
13+
"test": ["cu121", "cu124"],
14+
"release": ["cu121", "cu124"],
15+
}
16+
17+
# please update the python version you want to test with the future tensorRT version here
18+
# channel: nightly if the future tensorRT version test workflow is triggered from the main branch or your personal branch
19+
# channel: test if the future tensorRT version test workflow is triggered from the release branch(release/2.5 etc....)
20+
PYTHON_VERSIONS_DICT = {
21+
"nightly": ["3.9"],
22+
"test": ["3.9", "3.10", "3.11", "3.12"],
23+
"release": ["3.9", "3.10", "3.11", "3.12"],
24+
}
25+
26+
# please update the future tensorRT version you want to test here
27+
TENSORRT_VERSIONS_DICT = {
28+
"windows": {
29+
"10.4.0": {
30+
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.4.0/zip/TensorRT-10.4.0.26.Windows.win10.cuda-12.6.zip",
31+
"strip_prefix": "TensorRT-10.4.0.26",
32+
"sha256": "3a7de83778b9e9f812fd8901e07e0d7d6fc54ce633fcff2e340f994df2c6356c",
33+
},
34+
"10.5.0": {
35+
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/zip/TensorRT-10.5.0.18.Windows.win10.cuda-12.6.zip",
36+
"strip_prefix": "TensorRT-10.5.0.18",
37+
"sha256": "e6436f4164db4e44d727354dccf7d93755efb70d6fbfd6fa95bdfeb2e7331b24",
38+
},
39+
"10.6.0": {
40+
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.6.0/zip/TensorRT-10.6.0.26.Windows.win10.cuda-12.6.zip",
41+
"strip_prefix": "TensorRT-10.6.0.26",
42+
"sha256": "6c6d92c108a1b3368423e8f69f08d31269830f1e4c9da43b37ba34a176797254",
43+
},
44+
},
45+
"linux": {
46+
"10.4.0": {
47+
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.4.0/tars/TensorRT-10.4.0.26.Linux.x86_64-gnu.cuda-12.6.tar.gz",
48+
"strip_prefix": "TensorRT-10.4.0.26",
49+
"sha256": "cb0273ecb3ba4db8993a408eedd354712301a6c7f20704c52cdf9f78aa97bbdb",
50+
},
51+
"10.5.0": {
52+
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/tars/TensorRT-10.5.0.18.Linux.x86_64-gnu.cuda-12.6.tar.gz",
53+
"strip_prefix": "TensorRT-10.5.0.18",
54+
"sha256": "f404d379d639552a3e026cd5267213bd6df18a4eb899d6e47815bbdb34854958",
55+
},
56+
"10.6.0": {
57+
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.6.0/tars/TensorRT-10.6.0.26.Linux.x86_64-gnu.cuda-12.6.tar.gz",
58+
"strip_prefix": "TensorRT-10.6.0.26",
59+
"sha256": "33d3c2f3f4c84dc7991a4337a6fde9ed33f5c8e5c4f03ac2eb6b994a382b03a0",
60+
},
61+
},
62+
}
63+
64+
65+
def main(args: list[str]) -> None:
66+
parser = argparse.ArgumentParser()
67+
parser.add_argument(
68+
"--matrix",
69+
help="matrix",
70+
type=str,
71+
default="",
72+
)
73+
74+
options = parser.parse_args(args)
75+
if options.matrix == "":
76+
raise Exception("--matrix is empty, please provide the matrix json str")
77+
78+
matrix_dict = json.loads(options.matrix)
79+
includes = matrix_dict["include"]
80+
assert len(includes) > 0
81+
if "channel" not in includes[0]:
82+
raise Exception(f"channel field is missing from the matrix: {options.matrix}")
83+
channel = includes[0]["channel"]
84+
if channel not in ("nightly", "test", "release"):
85+
raise Exception(
86+
f"channel field: {channel} is not supported, currently supported value: nightly, test, release"
87+
)
88+
89+
if "validation_runner" not in includes[0]:
90+
raise Exception(
91+
f"validation_runner field is missing from the matrix: {options.matrix}"
92+
)
93+
if "windows" in includes[0]["validation_runner"]:
94+
arch = "windows"
95+
elif "linux" in includes[0]["validation_runner"]:
96+
arch = "linux"
97+
else:
98+
raise Exception(
99+
f"{includes[0].validation_runner} is not the supported arch, currently only support windows and linux"
100+
)
101+
102+
cuda_versions = CUDA_VERSIONS_DICT[channel]
103+
python_versions = PYTHON_VERSIONS_DICT[channel]
104+
tensorrt_versions = TENSORRT_VERSIONS_DICT[arch]
105+
106+
filtered_includes = []
107+
for item in includes:
108+
if (
109+
item["desired_cuda"] in cuda_versions
110+
and item["python_version"] in python_versions
111+
):
112+
for tensorrt_version, tensorrt_json in tensorrt_versions.items():
113+
new_item = copy.deepcopy(item)
114+
tensorrt_json["version"] = tensorrt_version
115+
new_item["tensorrt"] = tensorrt_json
116+
filtered_includes.append(new_item)
117+
filtered_matrix_dict = {}
118+
filtered_matrix_dict["include"] = filtered_includes
119+
print(json.dumps(filtered_matrix_dict))
120+
121+
122+
if __name__ == "__main__":
123+
main(sys.argv[1:])
Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
name: Build Torch-TensorRT wheel on Linux with Future TensorRT Versions
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
repository:
7+
description: 'Repository to checkout, defaults to ""'
8+
default: ""
9+
type: string
10+
ref:
11+
description: 'Reference to checkout, defaults to "nightly"'
12+
default: "nightly"
13+
type: string
14+
test-infra-repository:
15+
description: "Test infra repository to use"
16+
default: "pytorch/test-infra"
17+
type: string
18+
test-infra-ref:
19+
description: "Test infra reference to use"
20+
default: ""
21+
type: string
22+
build-matrix:
23+
description: "Build matrix to utilize"
24+
default: ""
25+
type: string
26+
pre-script:
27+
description: "Pre script to run prior to build"
28+
default: ""
29+
type: string
30+
post-script:
31+
description: "Post script to run prior to build"
32+
default: ""
33+
type: string
34+
smoke-test-script:
35+
description: "Script for Smoke Test for a specific domain"
36+
default: ""
37+
type: string
38+
env-var-script:
39+
description: "Script that sets Domain-Specific Environment Variables"
40+
default: ""
41+
type: string
42+
package-name:
43+
description: "Name of the actual python package that is imported"
44+
default: ""
45+
type: string
46+
trigger-event:
47+
description: "Trigger Event in caller that determines whether or not to upload"
48+
default: ""
49+
type: string
50+
cache-path:
51+
description: "The path(s) on the runner to cache or restore. The path is relative to repository."
52+
default: ""
53+
type: string
54+
cache-key:
55+
description: "The key created when saving a cache and the key used to search for a cache."
56+
default: ""
57+
type: string
58+
architecture:
59+
description: Architecture to build for x86_64 for default Linux, or aarch64 for Linux aarch64 builds
60+
required: false
61+
type: string
62+
default: x86_64
63+
submodules:
64+
description: Works as stated in actions/checkout, but the default value is recursive
65+
required: false
66+
type: string
67+
default: recursive
68+
setup-miniconda:
69+
description: Set to true if setup-miniconda is needed
70+
required: false
71+
type: boolean
72+
default: true
73+
74+
permissions:
75+
id-token: write
76+
contents: read
77+
78+
jobs:
79+
build:
80+
strategy:
81+
fail-fast: false
82+
matrix: ${{ fromJSON(inputs.build-matrix) }}
83+
env:
84+
PYTHON_VERSION: ${{ matrix.python_version }}
85+
PACKAGE_TYPE: wheel
86+
REPOSITORY: ${{ inputs.repository }}
87+
REF: ${{ inputs.ref }}
88+
CU_VERSION: ${{ matrix.desired_cuda }}
89+
UPLOAD_TO_BASE_BUCKET: ${{ matrix.upload_to_base_bucket }}
90+
ARCH: ${{ inputs.architecture }}
91+
TENSORRT_STRIP_PREFIX: ${{ matrix.tensorrt.strip_prefix }}
92+
TENSORRT_VERSION: ${{ matrix.tensorrt.version }}
93+
TENSORRT_URLS: ${{ matrix.tensorrt.urls }}
94+
TENSORRT_SHA256: ${{ matrix.tensorrt.sha256 }}
95+
UPLOAD_ARTIFACT_NAME: pytorch_tensorrt_${{ matrix.tensorrt.version }}_${{ matrix.python_version }}_${{ matrix.desired_cuda }}_${{ inputs.architecture }}
96+
name: build_tensorrt${{ matrix.tensorrt.version }}_py${{matrix.python_version}}_${{matrix.desired_cuda}}
97+
runs-on: ${{ matrix.validation_runner }}
98+
container:
99+
image: ${{ matrix.container_image }}
100+
options: ${{ matrix.gpu_arch_type == 'cuda' && '--gpus all' || ' ' }}
101+
# If a build is taking longer than 120 minutes on these runners we need
102+
# to have a conversation
103+
timeout-minutes: 120
104+
105+
steps:
106+
- name: Clean workspace
107+
shell: bash -l {0}
108+
run: |
109+
set -x
110+
echo "::group::Cleanup debug output"
111+
rm -rf "${GITHUB_WORKSPACE}"
112+
mkdir -p "${GITHUB_WORKSPACE}"
113+
if [[ "${{ inputs.architecture }}" = "aarch64" ]]; then
114+
rm -rf "${RUNNER_TEMP}/*"
115+
fi
116+
echo "::endgroup::"
117+
- uses: actions/checkout@v3
118+
with:
119+
# Support the use case where we need to checkout someone's fork
120+
repository: ${{ inputs.test-infra-repository }}
121+
ref: ${{ inputs.test-infra-ref }}
122+
path: test-infra
123+
- uses: actions/checkout@v3
124+
if: ${{ env.ARCH == 'aarch64' }}
125+
with:
126+
# Support the use case where we need to checkout someone's fork
127+
repository: "pytorch/builder"
128+
ref: "main"
129+
path: builder
130+
- name: Set linux aarch64 CI
131+
if: ${{ inputs.architecture == 'aarch64' }}
132+
shell: bash -l {0}
133+
env:
134+
DESIRED_PYTHON: ${{ matrix.python_version }}
135+
run: |
136+
set +e
137+
# TODO: This is temporary aarch64 setup script, this should be integrated into aarch64 docker.
138+
${GITHUB_WORKSPACE}/builder/aarch64_linux/aarch64_ci_setup.sh
139+
echo "/opt/conda/bin" >> $GITHUB_PATH
140+
set -e
141+
- uses: ./test-infra/.github/actions/set-channel
142+
- name: Set PYTORCH_VERSION
143+
if: ${{ env.CHANNEL == 'test' }}
144+
run: |
145+
# When building RC, set the version to be the current candidate version,
146+
# otherwise, leave it alone so nightly will pick up the latest
147+
echo "PYTORCH_VERSION=${{ matrix.stable_version }}" >> "${GITHUB_ENV}"
148+
- uses: ./test-infra/.github/actions/setup-binary-builds
149+
env:
150+
PLATFORM: ${{ inputs.architecture == 'aarch64' && 'linux-aarch64' || ''}}
151+
with:
152+
repository: ${{ inputs.repository }}
153+
ref: ${{ inputs.ref }}
154+
submodules: ${{ inputs.submodules }}
155+
setup-miniconda: ${{ inputs.setup-miniconda }}
156+
python-version: ${{ env.PYTHON_VERSION }}
157+
cuda-version: ${{ env.CU_VERSION }}
158+
arch: ${{ env.ARCH }}
159+
- name: Combine Env Var and Build Env Files
160+
if: ${{ inputs.env-var-script != '' }}
161+
working-directory: ${{ inputs.repository }}
162+
shell: bash -l {0}
163+
run: |
164+
cat "${{ inputs.env-var-script }}" >> "${BUILD_ENV_FILE}"
165+
- name: Install torch dependency
166+
shell: bash -l {0}
167+
run: |
168+
set -x
169+
# shellcheck disable=SC1090
170+
source "${BUILD_ENV_FILE}"
171+
# shellcheck disable=SC2086
172+
${CONDA_RUN} ${PIP_INSTALL_TORCH}
173+
- name: Run Pre-Script with Caching
174+
if: ${{ inputs.pre-script != '' }}
175+
uses: ./test-infra/.github/actions/run-script-with-cache
176+
with:
177+
cache-path: ${{ inputs.cache-path }}
178+
cache-key: ${{ inputs.cache-key }}
179+
repository: ${{ inputs.repository }}
180+
script: ${{ inputs.pre-script }}
181+
- name: Build clean
182+
working-directory: ${{ inputs.repository }}
183+
shell: bash -l {0}
184+
run: |
185+
set -x
186+
source "${BUILD_ENV_FILE}"
187+
${CONDA_RUN} python setup.py clean
188+
- name: Build the wheel (bdist_wheel)
189+
working-directory: ${{ inputs.repository }}
190+
shell: bash -l {0}
191+
run: |
192+
set -x
193+
source "${BUILD_ENV_FILE}"
194+
${CONDA_RUN} python setup.py bdist_wheel
195+
196+
- name: Run Post-Script
197+
if: ${{ inputs.post-script != '' }}
198+
uses: ./test-infra/.github/actions/run-script-with-cache
199+
with:
200+
repository: ${{ inputs.repository }}
201+
script: ${{ inputs.post-script }}
202+
- name: Smoke Test
203+
shell: bash -l {0}
204+
env:
205+
PACKAGE_NAME: ${{ inputs.package-name }}
206+
SMOKE_TEST_SCRIPT: ${{ inputs.smoke-test-script }}
207+
run: |
208+
set -x
209+
source "${BUILD_ENV_FILE}"
210+
# TODO: add smoke test for the auditwheel tarball built
211+
212+
# NB: Only upload to GitHub after passing smoke tests
213+
- name: Upload wheel to GitHub
214+
continue-on-error: true
215+
uses: actions/upload-artifact@v3
216+
with:
217+
name: ${{ env.UPLOAD_ARTIFACT_NAME }}
218+
path: ${{ inputs.repository }}/dist
219+
220+
concurrency:
221+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }}
222+
cancel-in-progress: true

0 commit comments

Comments
 (0)