Skip to content

chore: test bf16 fixes in CI #3491

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Apr 29, 2025
Merged
2 changes: 2 additions & 0 deletions .github/workflows/build-test-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ jobs:
pushd .
cd tests/py
python -m pip install -r requirements.txt
python -m pip install nvidia-modelopt[all] --extra-index-url https://pypi.nvidia.com
cd dynamo
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml automatic_plugin/test_automatic_plugin.py
Expand Down Expand Up @@ -172,6 +173,7 @@ jobs:
pushd .
cd tests/py
python -m pip install -r requirements.txt
python -m pip install nvidia-modelopt[all] --extra-index-url https://pypi.nvidia.com
cd dynamo
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/
popd
Expand Down
2 changes: 1 addition & 1 deletion py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -893,7 +893,7 @@ def get_attr(self, target: str, args: Any, kwargs: Any) -> np.ndarray:
else:
constant_tensor = frozen_attr

return to_torch(constant_tensor)
return to_torch(constant_tensor)

def call_method(self, target: str, args: Any, kwargs: Any) -> Any:
assert isinstance(target, str)
Expand Down
63 changes: 33 additions & 30 deletions py/torch_tensorrt/dynamo/conversion/converter_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -590,42 +590,45 @@ def to_numpy(
Returns:
A Numpy array or None, if the input was None.
"""
output = None
with unset_fake_temporarily():
output = None

if value is None or isinstance(value, np.ndarray):
output = value
if value is None or isinstance(value, np.ndarray):
output = value

elif isinstance(value, torch.Tensor):
if value.is_quantized:
value = value.dequantize()
elif value.dtype == torch.bfloat16:
# TODO: Remove when numpy has a BF16 type
_LOGGER.warning(
"Requested a conversion of bfloat16 tensor from torch to numpy which isn't supported. Casting this tensor to FP32 precision currently. Please use to_torch() API for better data representation",
)
value = value.to(torch.float)

output = value.cpu().detach().contiguous().numpy()
elif isinstance(value, torch.Tensor):
if value.is_quantized:
value = value.dequantize()
elif value.dtype == torch.bfloat16:
# TODO: Remove when numpy has a BF16 type
_LOGGER.warning(
"Requested a conversion of bfloat16 tensor from torch to numpy which isn't supported. Casting this tensor to FP32 precision currently. Please use to_torch() API for better data representation",
)
value = value.to(torch.float)

elif isinstance(value, int):
output = np.array([value], dtype=np.int32)
output = value.cpu().detach().contiguous().numpy()

elif isinstance(value, float):
output = np.array([value], dtype=np.float32)
elif isinstance(value, int):
output = np.array([value], dtype=np.int32)

elif isinstance(value, bool):
output = np.array([value], dtype=np.bool_)
elif isinstance(value, float):
output = np.array([value], dtype=np.float32)

if isinstance(output, np.ndarray) or output is None:
return (
output
if (dtype is None or output is None)
else output.astype(_enums.dtype._from(dtype).to(np.dtype, use_default=True))
)
else:
raise AssertionError(
f"to_numpy can only be called on None, bool, int, float, np.ndarray, or torch.Tensor, got: {value}"
)
elif isinstance(value, bool):
output = np.array([value], dtype=np.bool_)

if isinstance(output, np.ndarray) or output is None:
return (
output
if (dtype is None or output is None)
else output.astype(
_enums.dtype._from(dtype).to(np.dtype, use_default=True)
)
)
else:
raise AssertionError(
f"to_numpy can only be called on None, bool, int, float, np.ndarray, or torch.Tensor, got: {value}"
)


def to_torch(
Expand Down
8 changes: 1 addition & 7 deletions tests/py/dynamo/backend/test_backend_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@
from copy import deepcopy

import torch
import torch_tensorrt
from torch.testing._internal.common_utils import TestCase, run_tests
from torch_tensorrt.dynamo.partitioning import fast_partition

import torch_tensorrt

from ..testing_utilities import DECIMALS_OF_AGREEMENT, lower_graph_testing


Expand Down Expand Up @@ -51,7 +50,6 @@ def forward(self, x, y):
pass_through_build_failures=True,
torch_executed_ops={"torch.ops.aten.add.Tensor"},
use_python_runtime=False,
debug=True,
)
optimized_model_results = optimized_model(*inputs).detach().cpu()
torch_model_results = fx_graph(*inputs).detach().cpu()
Expand Down Expand Up @@ -132,7 +130,6 @@ def forward(self, x, y):
pass_through_build_failures=True,
torch_executed_ops={"torch.ops.aten.add.Tensor"},
use_python_runtime=False,
debug=True,
)
optimized_model_results = optimized_model(*inputs).detach().cpu()
torch_model_results = model(*inputs).detach().cpu()
Expand Down Expand Up @@ -177,7 +174,6 @@ def forward(self, x, y):
optimization_level=4,
version_compatible=True,
max_aux_streams=5,
debug=True,
)
optimized_model_results = optimized_model(*inputs).detach().cpu()
torch_model_results = fx_graph(*inputs).detach().cpu()
Expand Down Expand Up @@ -225,7 +221,6 @@ def forward(self, x, y):
min_block_size=1,
pass_through_build_failures=True,
truncate_double=True,
debug=True,
)
optimized_model_results = optimized_model(*inputs).detach().cpu()
torch_model_results = fx_graph(*inputs).detach().cpu()
Expand Down Expand Up @@ -298,7 +293,6 @@ def forward(self, x, y):
min_block_size=1,
pass_through_build_failures=True,
truncate_double=False,
debug=True,
torch_executed_ops={"torch.ops.aten.add.Tensor"},
)
optimized_model_results = optimized_model(*inputs).detach().cpu()
Expand Down
2 changes: 0 additions & 2 deletions tests/py/dynamo/conversion/harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,6 @@ def run_test(
compilation_settings = CompilationSettings(
enabled_precisions={dtype._from(precision)},
truncate_double=True,
debug=True,
immutable_weights=immutable_weights,
)

Expand Down Expand Up @@ -507,7 +506,6 @@ def run_test_compare_tensor_attributes_only(
compilation_settings = CompilationSettings(
enabled_precisions={dtype._from(precision)},
truncate_double=True,
debug=True,
immutable_weights=immutable_weights,
)

Expand Down
1 change: 0 additions & 1 deletion tests/py/dynamo/models/test_dtype_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,6 @@ def forward(self, x):
ir="torch_compile",
inputs=inputs,
enabled_precisions={torch.bfloat16},
debug=True,
min_block_size=1,
device=device,
cache_built_engines=False,
Expand Down
3 changes: 1 addition & 2 deletions tests/py/dynamo/models/test_models_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,6 @@ def calibrate_loop(model):

@unittest.skipIf(
platform.system() != "Linux"
or torch.cuda.get_device_capability() < (8, 9)
or not importlib.util.find_spec("modelopt")
or Version(metadata.version("nvidia-modelopt")) < Version("0.17.0"),
"modelopt 0.17.0 or later is required, Int8 quantization is supported in modelopt since 0.17.0 or later for linux",
Expand Down Expand Up @@ -290,7 +289,7 @@ def calibrate_loop(model):

with torch.no_grad():
with export_torch_mode():
exp_program = torch.export.export(model, (input_tensor,))
exp_program = torch.export.export(model, (input_tensor,), strict=False)
trt_model = torchtrt.dynamo.compile(
exp_program,
inputs=[input_tensor],
Expand Down
2 changes: 0 additions & 2 deletions tests/py/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,3 @@ pyyaml
timm>=1.0.3
flashinfer-python; python_version < "3.13"
transformers==4.49.0
nvidia-modelopt[deploy,hf,torch]~=0.17.0; python_version < "3.13"
--extra-index-url https://pypi.nvidia.com
Loading