Consolidated unit test skips and unskips

pruthvistony · jithunnair-amd · commit b6c854992734 · 2024-10-03T19:34:06.000Z
======================================== Temporarily skip test_conv3d_64bit_indexing - Rocblas API support is requested - SWDEV-383635 & sub task - SWDEV-390218 Skip ddp apply_optim_in_bwd tests for gloo (#1302) To resolve https://ontrack-internal.amd.com/browse/SWDEV-403530 and https://ontrack-internal.amd.com/browse/SWDEV-419837. For more context check upstream issue pytorch#111834 Add skipIfRocmArch decorator for Navi skips (#1356) Converted NAVI check as a function (#1364) * Moved NAVI check to the test file * Revised NAVI check as a function [Navi] [Inductor] Unskip Navi inductor UTs (#1514) Relates to https://ontrack-internal.amd.com/browse/SWDEV-461590 Bad import in test_torchinductor and skip torchvision related UT (#1374) skip test_inductor_freezing failing UTs (#1375) Skip test_mm_triton_kernel_benchmark (#1376) * Running triton kernel on ROCM only has one GB/s metric reported * Update test_kernel_benchmark.py skip vmapvjpvjp_linalg_householder_product_cuda_float32 (#1420) skipIfRocm needs msg parameter [NO CP] Updated changes to skip few UTs Imported skipIfRocm in certain test suites (#1577) Fixes SWDEV-472397 Added functions imports (#1521) Fixes inductor.test_torchinductor_dynamic_shapes::TestInductorDynamicCUDA::test_item_unbacked_stride_nobreak_cuda
diff --git a/test/dynamo/test_structured_trace.py b/test/dynamo/test_structured_trace.py
@@ -19,7 +19,7 @@
 from torch._inductor.test_case import TestCase
 from torch._logging._internal import TorchLogsFormatter
 from torch.nn.parallel import DistributedDataParallel as DDP
-from torch.testing._internal.common_utils import find_free_port
+from torch.testing._internal.common_utils import find_free_port, skipIfRocm
 from torch.testing._internal.inductor_utils import HAS_CUDA
 
 
@@ -192,6 +192,7 @@ def test_schedule(self):
         self.assertParses()
 
     @requires_cuda
+    @skipIfRocm(msg="TODO: temp skip on ROCm 6.2")
     def test_cudagraphs(self):
         fn_opt = torch.compile(mode="reduce-overhead")(inductor_schedule_fn)
         fn_opt(torch.ones(1000, 1000, device="cuda"))
diff --git a/test/functorch/test_ops.py b/test/functorch/test_ops.py
@@ -951,7 +951,7 @@ def fn(inp, *args, **kwargs):
                 # (3) encountering this error in PyTorch internals.
                 xfail("index_reduce", "prod"),
                 decorate(
-                    "linalg.householder_product", decorator=runOnRocm
+                    "linalg.householder_product", decorator=skipIfRocm
                 ),  # works on ROCm
                 xfail(
                     # nans
diff --git a/test/inductor/test_cuda_repro.py b/test/inductor/test_cuda_repro.py
@@ -31,6 +31,7 @@
     freeze_rng_state,
     IS_FBCODE,
     skipIfRocm,
+    skipIfRocmArch,
     TEST_WITH_ASAN,
 )
 from torch.testing._internal.inductor_utils import skipCUDAIf
@@ -52,7 +53,7 @@
         sys.exit(0)
     raise
 
-
+NAVI_ARCH = ("gfx1100", "gfx1101") # Used for navi exclusive skips on ROCm
 TestCase = test_torchinductor.TestCase
 ToTuple = test_torchinductor.ToTuple
 check_model_cuda = test_torchinductor.check_model_cuda
diff --git a/test/inductor/test_inductor_freezing.py b/test/inductor/test_inductor_freezing.py
@@ -23,6 +23,23 @@
 pytorch_test_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
 sys.path.append(pytorch_test_dir)
 
+from torch.testing._internal.common_utils import (
+    IS_CI,
+    IS_WINDOWS,
+    TEST_WITH_ASAN,
+    TEST_WITH_ROCM,
+    skipIfRocm,
+)
+
+
+if IS_WINDOWS and IS_CI:
+    sys.stderr.write(
+        "Windows CI does not have necessary dependencies for test_torchinductor yet\n"
+    )
+    if __name__ == "__main__":
+        sys.exit(0)
+    raise unittest.SkipTest("requires sympy/functorch/filelock")
+
 from inductor.test_torchinductor import check_model, check_model_cuda, copy_tests
 from torch.testing._internal.common_utils import TEST_WITH_ASAN, TEST_WITH_ROCM
 
diff --git a/test/inductor/test_kernel_benchmark.py b/test/inductor/test_kernel_benchmark.py
@@ -15,7 +15,7 @@
 from torch.testing import FileCheck
 from torch.testing._internal.common_device_type import expectedFailureXPU
 from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_GPU
-
+from torch.testing._internal.common_utils import skipIfRocm
 
 class TestKernelBenchmark(TestCase):
     device_type = GPU_TYPE
@@ -136,6 +136,7 @@ def f(a, b):
     @expectedFailureXPU
     @config.patch(max_autotune=True, max_autotune_gemm_backends="TRITON")
     @fresh_inductor_cache()
+    @skipIfRocm #This seems to be disabled upstream https://github.com/pytorch/pytorch/issues/118346
     def test_mm_triton_kernel_benchmark(self):
         M = 2048
         N = 2432
diff --git a/test/inductor/test_torchinductor.py b/test/inductor/test_torchinductor.py
@@ -70,6 +70,7 @@
 from torch.testing._internal.common_device_type import (
     _has_sufficient_memory,
     expectedFailureXPU,
+    get_desired_device_type_test_bases,
 )
 from torch.testing._internal.common_dtype import all_types, get_all_dtypes
 from torch.testing._internal.common_utils import (
@@ -85,6 +86,8 @@
     skipIfWindows,
     skipIfXpu,
     subtest,
+    skipIfRocmArch,
+    subtest,
     TEST_WITH_ASAN,
     TEST_WITH_ROCM,
 )
@@ -119,6 +122,10 @@
 
 
 HAS_AVX2 = "fbgemm" in torch.backends.quantized.supported_engines
+_desired_test_bases = get_desired_device_type_test_bases()
+RUN_CPU = any(getattr(x, "device_type", "") == "cpu" for x in _desired_test_bases)
+RUN_GPU = any(getattr(x, "device_type", "") == GPU_TYPE for x in _desired_test_bases)
+NAVI_ARCH = ("gfx1100", "gfx1101") # Used for navi exclusive skips on ROCm
 
 aten = torch.ops.aten
 
@@ -6847,6 +6854,8 @@ def fn(in_ptr0, in_ptr1, in_ptr2):
             ),
         )
 
+    @skipIfWindows
+    @skipIfRocm
     def test_roi_align(self):
         if not has_torchvision_roi_align():
             raise unittest.SkipTest("requires torchvision")
@@ -7686,6 +7695,7 @@ def fn(a, dim, index, b, reduce):
             )
 
     @skip_if_gpu_halide
+    # issue #1150
     def test_dense_mask_index(self):
         r"""
         There will be a little difference for reduce order between aten and inductor
diff --git a/test/inductor/test_torchinductor_dynamic_shapes.py b/test/inductor/test_torchinductor_dynamic_shapes.py
@@ -32,6 +32,7 @@
     TEST_CUDA_MEM_LEAK_CHECK,
     TEST_WITH_ASAN,
     TEST_WITH_ROCM,
+    skipIfRocm,
 )
 from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_CPU, HAS_GPU
 
@@ -241,6 +242,7 @@ def fn(x, y):
         self.assertEqual(r, opt_r)
 
     @torch._dynamo.config.patch(capture_scalar_outputs=True)
+    @skipIfRocm(msg="TODO: temp skip on ROCm 6.2")
     def test_unwrap_storage_didnt_work_repro(self, device):
         def f():
             full = torch.full((), 11)
diff --git a/test/nn/test_convolution.py b/test/nn/test_convolution.py
@@ -53,6 +53,7 @@
     parametrize as parametrize_test,
     run_tests,
     set_default_dtype,
+    skipIfRocm,
     skipIfNotMiopenSuggestNHWC,
     skipIfRocmVersionLessThan,
     subtest,
@@ -4022,8 +4023,10 @@ def test_conv_double_backward_strided_with_3D_input_and_weight(self, device):
         self.assertEqual(grad_weight.shape, weight.shape)
 
     @onlyCUDA
-    @largeTensorTest("40GB")
-    @largeTensorTest("24GB", "cpu")
+    @largeTensorTest('40GB')
+    @largeTensorTest('24GB', 'cpu')
+    # Skipped for ROCm temp - https://ontrack-internal.amd.com/browse/SWDEV-383635
+    @skipIfRocm
     def test_conv3d_64bit_indexing(self, device):
         x = torch.rand(1, 32, 512, 512, 256)
         m = torch.nn.Conv3d(32, 1, kernel_size=1, padding=0, stride=1, bias=False)
diff --git a/test/run_test.py b/test/run_test.py
@@ -185,6 +185,9 @@ def __contains__(self, item):
     "distributed/_tensor/test_attention",
 ]
 
+if sys.version_info.major < 3 or (sys.version_info.major == 3 and sys.version_info.minor <= 9):
+    ROCM_BLOCKLIST.append("test_typing")
+
 XPU_BLOCKLIST = [
     "test_autograd",
     "profiler/test_cpp_thread",
diff --git a/test/test_cuda.py b/test/test_cuda.py
@@ -1884,9 +1884,8 @@ def test_graph_capture_oom(self):
             with torch.cuda.graph(torch.cuda.CUDAGraph()):
                 torch.zeros(2**40, device="cuda")
 
-    @unittest.skipIf(
-        not TEST_CUDA_GRAPH, "CUDA >= 11.0 or ROCM >= 5.3 required for graphs"
-    )
+    @unittest.skipIf(not TEST_CUDA_GRAPH, "CUDA >= 11.0 or ROCM >= 5.3 required for graphs")
+    @skipIfRocm(msg="TODO: temp skip on ROCm 6.2")
     @serialTest()
     def test_repeat_graph_capture_cublas_workspace_memory(self):
         (x, y, z) = 1024, 512, 64
@@ -2842,6 +2841,7 @@ def forward(self, input_dict: dict):
     @unittest.skipIf(
         not TEST_CUDA_GRAPH, "CUDA >= 11.0 or ROCM >= 5.3 required for graphs"
     )
+    @skipIfRocm(msg="TODO: temp skip on ROCm 6.2")
     def test_graph_make_graphed_callables_same_pool(self):
         torch.manual_seed(5)
         torch.cuda.manual_seed(5)
diff --git a/test/test_fx.py b/test/test_fx.py
@@ -57,6 +57,7 @@
     IS_WINDOWS,
     find_library_location,
     run_tests,
+    skipIfRocm,
     skipIfTorchDynamo,
 )
 from torch.testing._internal.jit_utils import JitTestCase
@@ -4183,6 +4184,7 @@ def test_class_member_back_compat(self):
                   f"and subsequently --accept the change."
             raise AssertionError(msg) from e
 
+    @skipIfRocm(msg="TODO: flaky - https://github.com/pytorch/pytorch/issues/104012")
     def test_public_api_surface(self):
         non_back_compat_objects = {}
 
diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py
@@ -1279,6 +1279,14 @@ def printErrors(self) -> None:
 IS_X86 = platform.machine() in ('x86_64', 'i386')
 IS_ARM64 = platform.machine() in ('arm64', 'aarch64')
 
+def is_navi_arch():
+    if torch.cuda.is_available():
+        prop = torch.cuda.get_device_properties(0)
+        gfx_arch = prop.gcnArchName.split(":")[0]
+        if gfx_arch in ["gfx1100", "gfx1101", "gfx1102"]:
+            return True
+    return False
+
 def is_avx512_vnni_supported():
     if sys.platform != 'linux':
         return False
@@ -1754,6 +1762,19 @@ def wrapper(*args, **kwargs):
         return dec_fn(func)
     return dec_fn
 
+def skipIfRocmArch(arch: Tuple[str, ...]):
+    def dec_fn(fn):
+        @wraps(fn)
+        def wrap_fn(self, *args, **kwargs):
+            if TEST_WITH_ROCM:
+                prop = torch.cuda.get_device_properties(0)
+                if prop.gcnArchName.split(":")[0] in arch:
+                    reason = f"skipIfRocm: test skipped on {arch}"
+                    raise unittest.SkipTest(reason)
+            return fn(self, *args, **kwargs)
+        return wrap_fn
+    return dec_fn
+
 def runOnRocm(fn):
     @wraps(fn)
     def wrapper(*args, **kwargs):
diff --git a/torch/testing/_internal/distributed/distributed_test.py b/torch/testing/_internal/distributed/distributed_test.py
@@ -4863,7 +4863,11 @@ def _test_ddp_apply_optim_in_backward(
                         # set_to_none for regular optimizer to match in backward
                         # case.
                         optim.zero_grad(set_to_none=True)
-
+        
+        @skip_but_pass_in_sandcastle_if(
+            BACKEND == "gloo" and HAS_TORCHVISION,
+            "Failing with gloo backend + torchvision due to ongoing issue https://github.com/pytorch/pytorch/issues/111834",
+        )
         @skip_if_lt_x_gpu(2)
         def test_ddp_apply_optim_in_backward(self):
             for optim_cls, init_before in itertools.product(
@@ -4876,6 +4880,10 @@ def test_ddp_apply_optim_in_backward(self):
                         init_before=init_before,
                     )
 
+        @skip_but_pass_in_sandcastle_if(
+            BACKEND == "gloo" and HAS_TORCHVISION,
+            "Failing with gloo backend + torchvision due to ongoing issue https://github.com/pytorch/pytorch/issues/111834",
+        )
         @skip_if_lt_x_gpu(2)
         def test_ddp_apply_optim_in_backward_grad_as_bucket_view_false(self):
             for init_before in [True, False]:

Original file line number	Diff line number	Diff line change
`@@ -185,6 +185,9 @@ def __contains__(self, item):`
`185`	`185`	`"distributed/_tensor/test_attention",`
`186`	`186`	`]`
`187`	`187`
	`188`	`+if sys.version_info.major < 3 or (sys.version_info.major == 3 and sys.version_info.minor <= 9):`
	`189`	`+ ROCM_BLOCKLIST.append("test_typing")`
	`190`	`+`
`188`	`191`	`XPU_BLOCKLIST = [`
`189`	`192`	`"test_autograd",`
`190`	`193`	`"profiler/test_cpp_thread",`