chore: patch test_hw_compatibility and restrict flash attention test to sm >=86 (#2631)

peri044 · apbose · commit 489e22dff37c · 2024-05-21T10:02:03.000-07:00
diff --git a/noxfile.py b/noxfile.py
@@ -258,11 +258,12 @@ def run_dynamo_runtime_tests(session):
     tests = [
         "runtime",
     ]
+    skip_tests = "-k not hw_compat"
     for test in tests:
         if USE_HOST_DEPS:
-            session.run_always("pytest", test, env={"PYTHONPATH": PYT_PATH})
+            session.run_always("pytest", test, skip_tests, env={"PYTHONPATH": PYT_PATH})
         else:
-            session.run_always("pytest", test)
+            session.run_always("pytest", test, skip_tests)
 
 
 def run_dynamo_model_compile_tests(session):
diff --git a/tests/py/dynamo/lowering/test_aten_lowering_passes.py b/tests/py/dynamo/lowering/test_aten_lowering_passes.py
@@ -2,10 +2,17 @@
 
 import torch
 import torch_tensorrt
+from torch.testing._internal.common_cuda import PLATFORM_SUPPORTS_FLASH_ATTENTION
 from torch.testing._internal.common_utils import TestCase, run_tests
 
 from ..testing_utilities import DECIMALS_OF_AGREEMENT, lower_graph_testing
 
+isSM8XDevice = torch.cuda.is_available() and torch.cuda.get_device_capability() in [
+    (8, 6),
+    (8, 7),
+    (8, 9),
+]
+
 
 class TestInputAsOutput(TestCase):
     def test_input_as_output(self):
@@ -274,6 +281,10 @@ def forward(self, q, k, v):
     "GPU compute capability is too low to run flash attention, need Ampere (8.0) or greater",
 )
 class TestLowerFlashAttention(TestCase):
+    @unittest.skipIf(
+        not PLATFORM_SUPPORTS_FLASH_ATTENTION or not isSM8XDevice,
+        "Does not support fused SDPA or not SM86+ hardware",
+    )
     def test_lower_flash_attention(self):
         class FlashAttention(torch.nn.Module):
             def forward(self, q, k, v):
@@ -343,6 +354,10 @@ def forward(self, q, k, v):
         )
         torch._dynamo.reset()
 
+    @unittest.skipIf(
+        not PLATFORM_SUPPORTS_FLASH_ATTENTION or not isSM8XDevice,
+        "Does not support fused SDPA or not SM86+ hardware",
+    )
     def test_flash_attention_converter(self):
         class FlashAttention(torch.nn.Module):
             def forward(self, q, k, v):