[fx2trt] Enable int8 in lower_to_trt (#21)

Mengchi Zhang · Wei Wei · commit 0d2cd7105a12 · 2022-06-03T17:54:11.000-07:00
Summary: Pull Request resolved: https://github.com/pytorch/fx2trt/pull/21 Reviewed By: jasonjk-park, yinghai Differential Revision: D34916991 fbshipit-source-id: c088b4d6fe40444e13433a6eac76bcbd0fa078e6
diff --git a/fx/fx2trt.py b/fx/fx2trt.py
@@ -155,6 +155,8 @@ def run(
         timing_cache=None,
         profiling_verbosity=None,
     ) -> TRTInterpreterResult:
+        assert not (fp16_mode and int8_mode), "We cannot enable both fp16 and int8 mode."
+
         TRT_INTERPRETER_CALL_PRE_OBSERVER.observe(self.module)
 
         # For float outputs, we set their dtype to fp16 only if fp16_mode=True and
@@ -193,7 +195,6 @@ def run(
             builder_config.set_flag(trt.BuilderFlag.INT8)
 
         if sparse_weights:
-            assert fp16_mode or int8_mode, "We can only enable sparsity in fp16 or int8 mode."
             builder_config.set_flag(trt.BuilderFlag.SPARSE_WEIGHTS)
 
         if strict_type_constraints:
diff --git a/fx/lower.py b/fx/lower.py
@@ -80,6 +80,7 @@ def lower_to_trt(
     max_workspace_size=1 << 25,
     explicit_batch_dimension=False,
     fp16_mode=True,
+    int8_mode=False,
     verbose_log=False,
     timing_cache_prefix="",
     save_timing_cache=False,
@@ -96,6 +97,7 @@ def lower_to_trt(
         max_workspace_size: Maximum size of workspace given to TensorRT.
         explicit_batch_dimension: Use explicit batch dimension in TensorRT if set True, otherwise use implicit batch dimension.
         fp16_mode: fp16 config given to TRTModule.
+        int8_mode: int8 config given to TRTModule.
         verbose_log: Enable verbose log for TensorRT if set True.
         timing_cache_prefix: Timing cache file name for timing cache used by fx2trt.
         save_timing_cache: Update timing cache with current timing cache data if set to True.
@@ -109,6 +111,7 @@ def lower_to_trt(
         max_workspace_size=max_workspace_size,
         explicit_batch_dimension=explicit_batch_dimension,
         fp16_mode=fp16_mode,
+        int8_mode=int8_mode,
         verbose_log=verbose_log,
         timing_cache_prefix=timing_cache_prefix,
         save_timing_cache=save_timing_cache,