Revert "Torch TRT ngc container changes (#3299)"

apbose · web-flow · commit 2399cfba0ca4 · 2024-12-13T11:25:53.000-08:00
This reverts commit 3982401.
diff --git a/core/util/Exception.cpp b/core/util/Exception.cpp
@@ -1,13 +1,3 @@
-#if defined(__GNUC__) && !defined(__clang__)
-#if __GNUC__ >= 13
-#include <cstdint>
-#endif
-#elif defined(__clang__)
-#if __clang_major__ >= 13
-#include <cstdint>
-#endif
-#endif
-
 #include "core/util/Exception.h"
 
 #include <iostream>
diff --git a/noxfile.py b/noxfile.py
@@ -258,12 +258,11 @@ def run_dynamo_runtime_tests(session):
     tests = [
         "runtime",
     ]
-    skip_tests = "-k not hw_compat"
     for test in tests:
         if USE_HOST_DEPS:
-            session.run_always("pytest", test, skip_tests, env={"PYTHONPATH": PYT_PATH})
+            session.run_always("pytest", test, env={"PYTHONPATH": PYT_PATH})
         else:
-            session.run_always("pytest", test, skip_tests)
+            session.run_always("pytest", test)
 
 
 def run_dynamo_model_compile_tests(session):
@@ -333,6 +332,7 @@ def run_int8_accuracy_tests(session):
     tests = [
         "ptq/test_ptq_to_backend.py",
         "ptq/test_ptq_dataloader_calibrator.py",
+        "qat/",
     ]
     for test in tests:
         if USE_HOST_DEPS:
@@ -473,6 +473,7 @@ def run_l1_int8_accuracy_tests(session):
         install_deps(session)
         install_torch_trt(session)
     train_model(session)
+    finetune_model(session)
     run_int8_accuracy_tests(session)
     cleanup(session)
 
diff --git a/tests/py/dynamo/lowering/test_aten_lowering_passes.py b/tests/py/dynamo/lowering/test_aten_lowering_passes.py
@@ -3,17 +3,10 @@
 
 import torch
 import torch_tensorrt
-from torch.testing._internal.common_cuda import PLATFORM_SUPPORTS_FLASH_ATTENTION
 from torch.testing._internal.common_utils import TestCase, run_tests
 
 from ..testing_utilities import DECIMALS_OF_AGREEMENT, lower_graph_testing
 
-isSM8XDevice = torch.cuda.is_available() and torch.cuda.get_device_capability() in [
-    (8, 6),
-    (8, 7),
-    (8, 9),
-]
-
 
 class TestInputAsOutput(TestCase):
     def test_input_as_output(self):
@@ -286,10 +279,6 @@ def forward(self, q, k, v):
     "Test not supported on Windows",
 )
 class TestLowerFlashAttention(TestCase):
-    @unittest.skipIf(
-        not PLATFORM_SUPPORTS_FLASH_ATTENTION or not isSM8XDevice,
-        "Does not support fused SDPA or not SM86+ hardware",
-    )
     def test_lower_flash_attention(self):
         class FlashAttention(torch.nn.Module):
             def forward(self, q, k, v):
@@ -359,10 +348,6 @@ def forward(self, q, k, v):
         )
         torch._dynamo.reset()
 
-    @unittest.skipIf(
-        not PLATFORM_SUPPORTS_FLASH_ATTENTION or not isSM8XDevice,
-        "Does not support fused SDPA or not SM86+ hardware",
-    )
     def test_flash_attention_converter(self):
         class FlashAttention(torch.nn.Module):
             def forward(self, q, k, v):