Skip to content

feat: Implement Input class support for FX backend. #1763

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion core/runtime/TRTEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,10 +148,10 @@ TRTEngine::TRTEngine(
}

TRTEngine::~TRTEngine() {
rt.reset();
trt_engine_profiler.reset();
exec_ctx.reset();
cuda_engine.reset();
rt.reset();
}

void TRTEngine::disable_profiling() {
Expand Down
31 changes: 20 additions & 11 deletions examples/fx/lower_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import torch
import torchvision
import torch_tensorrt
from torch_tensorrt.fx import compile
from torch_tensorrt.fx.utils import LowerPrecision

Expand Down Expand Up @@ -98,13 +99,17 @@ def benchmark(

model = model.cuda().eval()
inputs = [x.cuda() for x in inputs]

# inputs = [torch_tensorrt.Input(shape=(128, 3, 224, 224), dtype=torch.float32)]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What are these comments for?

# inputs = [torch_tensorrt.Input(min_shape=[1, 3, 224, 224],
# opt_shape=[8, 3, 224, 224],
# max_shape=[32, 3, 224, 224],
# dtype=torch.float32)]
# benchmark base configuration
conf = Configuration(batch_iter=batch_iter, batch_size=batch_size)

configurations = [
# Baseline
replace(conf, name="CUDA Eager", trt=False),
# replace(conf, name="CUDA Eager", trt=False),
# FP32
replace(
conf,
Expand All @@ -115,14 +120,14 @@ def benchmark(
accuracy_rtol=1e-3,
),
# FP16
replace(
conf,
name="TRT FP16 Eager",
trt=True,
jit=False,
fp16=True,
accuracy_rtol=1e-2,
),
# replace(
# conf,
# name="TRT FP16 Eager",
# trt=True,
# jit=False,
# fp16=True,
# accuracy_rtol=1e-2,
# ),
]

results = [
Expand Down Expand Up @@ -189,8 +194,12 @@ def run_configuration_benchmark(
max_batch_size=conf.batch_size,
lower_precision=LowerPrecision.FP16 if conf.fp16 else LowerPrecision.FP32,
explicit_batch_dimension=True,
dynamic_batch=False,
)
random_inputs = [torch.randn((128, 3, 224, 224), dtype=torch.float32).cuda()]
time = benchmark_torch_function(
conf.batch_iter, lambda: lowered_module(*random_inputs)
)
time = benchmark_torch_function(conf.batch_iter, lambda: lowered_module(*input))
else:
print("Lowering with JIT is not available!", "red")

Expand Down
20 changes: 14 additions & 6 deletions examples/fx/lower_example_aten.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import torch
import torchvision
import torch_tensorrt
from torch_tensorrt.fx import compile
from torch_tensorrt.fx.utils import LowerPrecision

Expand Down Expand Up @@ -97,21 +98,25 @@ def benchmark(
"""

model = model.cuda().eval()
inputs = [x.cuda() for x in inputs]

# inputs = [x.cuda() for x in inputs]
inputs = [torch_tensorrt.Input(shape=(128, 3, 224, 224), dtype=torch.float32)]
# inputs = [torch_tensorrt.Input(min_shape=[1, 3, 224, 224],
# opt_shape=[8, 3, 224, 224],
# max_shape=[32, 3, 224, 224],
# dtype=torch.float32)]
# benchmark base configuration
conf = Configuration(batch_iter=batch_iter, batch_size=batch_size)

configurations = [
# Baseline
replace(conf, name="CUDA Eager", trt=False),
# replace(conf, name="CUDA Eager", trt=False),
# FP16
replace(
conf,
name="TRT FP16 Eager",
name="TRT FP32 Eager",
trt=True,
jit=False,
fp16=True,
fp16=False,
accuracy_rtol=1e-2,
),
]
Expand Down Expand Up @@ -182,7 +187,10 @@ def run_configuration_benchmark(
explicit_batch_dimension=True,
is_aten=True,
)
time = benchmark_torch_function(conf.batch_iter, lambda: lowered_module(*input))
random_inputs = [torch.randn((128, 3, 224, 224), dtype=torch.float32).cuda()]
time = benchmark_torch_function(
conf.batch_iter, lambda: lowered_module(*random_inputs)
)
else:
print("Lowering with JIT is not available!", "red")

Expand Down
6 changes: 5 additions & 1 deletion py/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,7 @@ def run(self):
if FX_ONLY:
ext_modules = None
packages = [
"torch_tensorrt",
"torch_tensorrt.fx",
"torch_tensorrt.fx.converters",
"torch_tensorrt.fx.passes",
Expand All @@ -358,6 +359,7 @@ def run(self):
"torch_tensorrt.fx.tracer.dispatch_tracer",
]
package_dir = {
"torch_tensorrt": "torch_tensorrt/",
"torch_tensorrt.fx": "torch_tensorrt/fx",
"torch_tensorrt.fx.converters": "torch_tensorrt/fx/converters",
"torch_tensorrt.fx.passes": "torch_tensorrt/fx/passes",
Expand Down Expand Up @@ -437,7 +439,9 @@ def run(self):
"bin/*",
"BUILD",
"WORKSPACE",
],
]
if not FX_ONLY
else ["_Input.py"]
},
exclude_package_data={
"": ["*.cpp"],
Expand Down
65 changes: 9 additions & 56 deletions py/torch_tensorrt/_Input.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import torch

from torch_tensorrt import _enums
from torch_tensorrt import _C


class Input(object):
Expand Down Expand Up @@ -41,6 +40,7 @@ class _ShapeMode(Enum):
DOMAIN_OFFSET = 2.0
low_tensor_domain_incl = 0.0
high_tensor_domain_excl = low_tensor_domain_incl + DOMAIN_OFFSET
torch_dtype = None
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we derive torch_dtype from self.dtype?


def __init__(self, *args, **kwargs):
"""__init__ Method for torch_tensorrt.Input
Expand Down Expand Up @@ -138,6 +138,9 @@ def __init__(self, *args, **kwargs):
)

if "dtype" in kwargs:
if isinstance(kwargs["dtype"], torch.dtype):
self.torch_dtype = kwargs["dtype"]

self.dtype = Input._parse_dtype(kwargs["dtype"])
self._explicit_set_dtype = True

Expand Down Expand Up @@ -173,59 +176,6 @@ def __str__(self) -> str:
else:
raise RuntimeError("Unknown input shape mode")

def _to_internal(self) -> _C.Input:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why was this taken out?

internal_in = _C.Input()
if self.shape_mode == Input._ShapeMode.DYNAMIC:
if not Input._supported_input_size_type(self.shape["min_shape"]):
raise TypeError(
"Input shape specifications for inputs are required to be a List, tuple or torch.Size, found type: "
+ str(type(self.shape["min_shape"]))
+ " for min_shape"
)
else:
internal_in.min = self.shape["min_shape"]

if not Input._supported_input_size_type(self.shape["opt_shape"]):
raise TypeError(
"Input shape specifications for inputs are required to be a List, tuple or torch.Size, found type: "
+ str(type(self.shape["opt_shape"]))
+ " for opt_shape"
)
else:
internal_in.opt = self.shape["opt_shape"]

if not Input._supported_input_size_type(self.shape["max_shape"]):
raise TypeError(
"Input shape specifications for inputs are required to be a List, tuple or torch.Size, found type: "
+ str(type(self.shape["max_shape"]))
+ " for max_shape"
)
else:
internal_in.max = self.shape["max_shape"]
internal_in.input_is_dynamic = True
else:
if not Input._supported_input_size_type(self.shape):
raise TypeError(
"Input shape specifications for inputs are required to be a List, tuple or torch.Size, found type: "
+ str(type(self.shape))
+ " for shape"
)
else:
internal_in.opt = self.shape
internal_in.input_is_dynamic = False

if self.dtype != _enums.dtype.unknown:
self._explicit_set_dtype = True
else:
self._explicit_set_dtype = False

internal_in.dtype = Input._parse_dtype(self.dtype)
internal_in._explicit_set_dtype = self._explicit_set_dtype
internal_in.format = Input._parse_format(self.format)

internal_in.tensor_domain = Input._parse_tensor_domain(self.tensor_domain)
return internal_in

@staticmethod
def _supported_input_size_type(input_size: Any) -> bool:
if isinstance(input_size, torch.Size):
Expand Down Expand Up @@ -304,6 +254,7 @@ def _parse_tensor_domain(domain: Optional[Tuple[float, float]]) -> Tuple:
Input.low_tensor_domain_incl,
Input.high_tensor_domain_excl,
)

elif len(domain) == 2:
domain_lo, domain_hi = domain

Expand Down Expand Up @@ -416,8 +367,10 @@ def example_tensor(self, optimization_profile_field: str = None) -> torch.Tensor
)

if self.shape_mode == Input._ShapeMode.STATIC:
return torch.randn(self.shape).to(dtype=self.dtype)
return torch.randn(self.shape).to(
dtype=self.dtype if not self.torch_dtype else self.torch_dtype
)
else:
return torch.randn(self.shape[optimization_profile_field]).to(
dtype=self.dtype
dtype=self.dtype if not self.torch_dtype else self.torch_dtype
)
3 changes: 1 addition & 2 deletions py/torch_tensorrt/_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,8 @@ def compile(
return torch_tensorrt.fx.compile(
module,
inputs,
lower_precision=lower_precision,
max_batch_size=inputs[0].size(0),
explicit_batch_dimension=True,
lower_precision=lower_precision,
dynamic_batch=False,
**kwargs,
)
Expand Down
2 changes: 1 addition & 1 deletion py/torch_tensorrt/fx/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
tensorrt_converter,
)
from .fx2trt import TRTInterpreter, TRTInterpreterResult # noqa
from .input_tensor_spec import generate_input_specs, InputTensorSpec # noqa
from .input_tensor_spec import InputTensorSpec # noqa
from .lower_setting import LowerSetting # noqa
from .trt_module import TRTModule # noqa
from .lower import compile # usort: skip #noqa
Expand Down
3 changes: 0 additions & 3 deletions py/torch_tensorrt/fx/fx2trt.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@ def validate_conversion(self):

def run(
self,
max_batch_size=64,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am afraid we can not do this change. We have to maintain backward compatibility on the API, otherwise, it will break our internal product.

max_workspace_size=1 << 25,
lower_precision=LowerPrecision.FP16,
sparse_weights=False,
Expand All @@ -167,7 +166,6 @@ def run(
"""
Build TensorRT engine with some configs.
Args:
max_batch_size: set accordingly for maximum batch size you will use.
max_workspace_size: set to the maximum size we can afford for temporary buffer
lower_precision: the precision model layers are running on (TensorRT will choose the best perforamnce precision).
sparse_weights: allow the builder to examine weights and use optimized functions when weights have suitable sparsity
Expand Down Expand Up @@ -207,7 +205,6 @@ def run(
)
build_engine_start_time = datetime.now()

self.builder.max_batch_size = max_batch_size
builder_config = self.builder.create_builder_config()
builder_config.max_workspace_size = max_workspace_size

Expand Down
Loading