Skip to content

Commit 93b5b8f

Browse files
authored
feat: Support weight-stripped engine and REFIT_IDENTICAL flag (#3167)
1 parent ba41cbb commit 93b5b8f

31 files changed

+1517
-450
lines changed

.github/scripts/generate_binary_build_matrix.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -152,33 +152,33 @@ def initialize_globals(channel: str, build_python_only: bool) -> None:
152152
"12.4": "pytorch/manylinux2_28-builder:cuda12.4",
153153
"12.6": "pytorch/manylinux2_28-builder:cuda12.6",
154154
**{
155-
gpu_arch: f"pytorch/manylinux-builder:rocm{gpu_arch}"
155+
gpu_arch: f"pytorch/manylinux2_28-builder:rocm{gpu_arch}"
156156
for gpu_arch in ROCM_ARCHES
157157
},
158-
CPU: "pytorch/manylinux-builder:cpu",
158+
CPU: "pytorch/manylinux2_28-builder:cpu",
159159
XPU: "pytorch/manylinux2_28-builder:xpu",
160160
# TODO: Migrate CUDA_AARCH64 image to manylinux2_28_aarch64-builder:cuda12.4
161161
CPU_AARCH64: "pytorch/manylinux2_28_aarch64-builder:cpu-aarch64",
162162
CUDA_AARCH64: "pytorch/manylinuxaarch64-builder:cuda12.4",
163163
}
164164
LIBTORCH_CONTAINER_IMAGES = {
165165
**{
166-
(gpu_arch, PRE_CXX11_ABI): f"pytorch/manylinux-builder:cuda{gpu_arch}"
166+
(gpu_arch, PRE_CXX11_ABI): f"pytorch/manylinux2_28-builder:cuda{gpu_arch}"
167167
for gpu_arch in CUDA_ARCHES
168168
},
169169
**{
170170
(gpu_arch, CXX11_ABI): f"pytorch/libtorch-cxx11-builder:cuda{gpu_arch}"
171171
for gpu_arch in CUDA_ARCHES
172172
},
173173
**{
174-
(gpu_arch, PRE_CXX11_ABI): f"pytorch/manylinux-builder:rocm{gpu_arch}"
174+
(gpu_arch, PRE_CXX11_ABI): f"pytorch/manylinux2_28-builder:rocm{gpu_arch}"
175175
for gpu_arch in ROCM_ARCHES
176176
},
177177
**{
178178
(gpu_arch, CXX11_ABI): f"pytorch/libtorch-cxx11-builder:rocm{gpu_arch}"
179179
for gpu_arch in ROCM_ARCHES
180180
},
181-
(CPU, PRE_CXX11_ABI): "pytorch/manylinux-builder:cpu",
181+
(CPU, PRE_CXX11_ABI): "pytorch/manylinux2_28-builder:cpu",
182182
(CPU, CXX11_ABI): "pytorch/libtorch-cxx11-builder:cpu",
183183
}
184184

.github/workflows/build-test-linux.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ jobs:
137137
export CI_BUILD=1
138138
pushd .
139139
cd tests/py/dynamo
140-
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 8 conversion/
140+
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/
141141
popd
142142
143143
tests-py-dynamo-fe:

.github/workflows/build-test-tensorrt-linux.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ jobs:
129129
export CI_BUILD=1
130130
pushd .
131131
cd tests/py/dynamo
132-
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 8 conversion/
132+
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/
133133
popd
134134
135135
tests-py-dynamo-fe:
@@ -314,4 +314,4 @@ jobs:
314314
315315
concurrency:
316316
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }}
317-
cancel-in-progress: true
317+
cancel-in-progress: true

.github/workflows/build-test-tensorrt-windows.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ jobs:
132132
export CI_BUILD=1
133133
pushd .
134134
cd tests/py/dynamo
135-
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 8 conversion/
135+
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/
136136
popd
137137
138138
tests-py-dynamo-fe:
@@ -298,4 +298,4 @@ jobs:
298298
299299
concurrency:
300300
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }}
301-
cancel-in-progress: true
301+
cancel-in-progress: true

.github/workflows/build-test-windows.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ jobs:
119119
export CI_BUILD=1
120120
pushd .
121121
cd tests/py/dynamo
122-
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 8 conversion/
122+
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/
123123
popd
124124
125125
tests-py-dynamo-fe:

examples/dynamo/engine_caching_bert_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def compile_bert(iterations=3):
5252
"truncate_double": True,
5353
"debug": False,
5454
"min_block_size": 1,
55-
"make_refittable": True,
55+
"immutable_weights": False,
5656
"cache_built_engines": cache_built_engines,
5757
"reuse_cached_engines": reuse_cached_engines,
5858
"engine_cache_dir": "/tmp/torch_trt_bert_engine_cache",

examples/dynamo/engine_caching_example.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def remove_timing_cache(path=TIMING_CACHE_PATH):
6363
# in a subsequent compilation, either as part of this session or a new session, the cache will
6464
# pull the built engine and **refit** the weights which can reduce compilation times by orders of magnitude.
6565
# As such, in order to insert a new engine into the cache (i.e. ``cache_built_engines=True``),
66-
# the engine must be refittable (``make_refittable=True``). See :ref:`refit_engine_example` for more details.
66+
# the engine must be refittable (``immutable_weights=False``). See :ref:`refit_engine_example` for more details.
6767

6868

6969
def torch_compile(iterations=3):
@@ -97,7 +97,7 @@ def torch_compile(iterations=3):
9797
"enabled_precisions": enabled_precisions,
9898
"debug": debug,
9999
"min_block_size": min_block_size,
100-
"make_refittable": True,
100+
"immutable_weights": False,
101101
"cache_built_engines": cache_built_engines,
102102
"reuse_cached_engines": reuse_cached_engines,
103103
},
@@ -157,7 +157,7 @@ def dynamo_compile(iterations=3):
157157
enabled_precisions=enabled_precisions,
158158
debug=debug,
159159
min_block_size=min_block_size,
160-
make_refittable=True,
160+
immutable_weights=False,
161161
cache_built_engines=cache_built_engines,
162162
reuse_cached_engines=reuse_cached_engines,
163163
engine_cache_size=1 << 30, # 1GB
@@ -268,7 +268,7 @@ def torch_compile_my_cache(iterations=3):
268268
"enabled_precisions": enabled_precisions,
269269
"debug": debug,
270270
"min_block_size": min_block_size,
271-
"make_refittable": True,
271+
"immutable_weights": False,
272272
"cache_built_engines": cache_built_engines,
273273
"reuse_cached_engines": reuse_cached_engines,
274274
"custom_engine_cache": engine_cache,

examples/dynamo/mutable_torchtrt_module_example.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
settings = {
3232
"use_python": False,
3333
"enabled_precisions": {torch.float32},
34-
"make_refittable": True,
34+
"immutable_weights": False,
3535
}
3636

3737
model = models.resnet18(pretrained=True).eval().to("cuda")
@@ -80,7 +80,7 @@
8080
"use_python_runtime": True,
8181
"enabled_precisions": {torch.float16},
8282
"debug": True,
83-
"make_refittable": True,
83+
"immutable_weights": False,
8484
}
8585

8686
model_id = "runwayml/stable-diffusion-v1-5"

examples/dynamo/refit_engine_example.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
# ---------------------------------------
4848
#
4949
# The inital step is to compile a module and save it as with a normal. Note that there is an
50-
# additional parameter `make_refittable` that is set to `True`. This parameter is used to
50+
# additional parameter `immutable_weights` that is set to `False`. This parameter is used to
5151
# indicate that the engine being built should support weight refitting later. Engines built without
5252
# these setttings will not be able to be refit.
5353
#
@@ -69,7 +69,7 @@
6969
debug=debug,
7070
min_block_size=min_block_size,
7171
torch_executed_ops=torch_executed_ops,
72-
make_refittable=True,
72+
immutable_weights=False,
7373
reuse_cached_engines=False,
7474
) # Output is a torch.fx.GraphModule
7575

py/ci/Dockerfile.ci

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM pytorch/manylinux-builder:cuda12.4
1+
FROM pytorch/manylinux2_28-builder:cuda12.6
22

33
RUN yum install -y ninja-build
44

py/torch_tensorrt/_enums.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ def _from(
220220
return dtype.f32
221221
elif t == np.float64:
222222
return dtype.f64
223-
elif t == np.bool:
223+
elif t == np.bool_:
224224
return dtype.b
225225
# TODO: Consider using ml_dtypes when issues like this are resolved:
226226
# https://github.com/pytorch/pytorch/issues/109873
@@ -1384,7 +1384,7 @@ def current_platform(cls) -> Platform:
13841384
def __str__(self) -> str:
13851385
return str(self.name)
13861386

1387-
@needs_torch_tensorrt_runtime
1387+
@needs_torch_tensorrt_runtime # type: ignore
13881388
def _to_serialized_rt_platform(self) -> str:
13891389
val: str = torch.ops.tensorrt._platform_unknown()
13901390

0 commit comments

Comments
 (0)