Skip to content

Commit b6c8549

Browse files
pruthvistonyjithunnair-amd
authored andcommitted
Consolidated unit test skips and unskips
======================================== Temporarily skip test_conv3d_64bit_indexing - Rocblas API support is requested - SWDEV-383635 & sub task - SWDEV-390218 Skip ddp apply_optim_in_bwd tests for gloo (#1302) To resolve https://ontrack-internal.amd.com/browse/SWDEV-403530 and https://ontrack-internal.amd.com/browse/SWDEV-419837. For more context check upstream issue pytorch#111834 Add skipIfRocmArch decorator for Navi skips (#1356) Converted NAVI check as a function (#1364) * Moved NAVI check to the test file * Revised NAVI check as a function [Navi] [Inductor] Unskip Navi inductor UTs (#1514) Relates to https://ontrack-internal.amd.com/browse/SWDEV-461590 Bad import in test_torchinductor and skip torchvision related UT (#1374) skip test_inductor_freezing failing UTs (#1375) Skip test_mm_triton_kernel_benchmark (#1376) * Running triton kernel on ROCM only has one GB/s metric reported * Update test_kernel_benchmark.py skip vmapvjpvjp_linalg_householder_product_cuda_float32 (#1420) skipIfRocm needs msg parameter [NO CP] Updated changes to skip few UTs Imported skipIfRocm in certain test suites (#1577) Fixes SWDEV-472397 Added functions imports (#1521) Fixes inductor.test_torchinductor_dynamic_shapes::TestInductorDynamicCUDA::test_item_unbacked_stride_nobreak_cuda
1 parent b1b9ef4 commit b6c8549

13 files changed

+79
-10
lines changed

test/dynamo/test_structured_trace.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from torch._inductor.test_case import TestCase
2020
from torch._logging._internal import TorchLogsFormatter
2121
from torch.nn.parallel import DistributedDataParallel as DDP
22-
from torch.testing._internal.common_utils import find_free_port
22+
from torch.testing._internal.common_utils import find_free_port, skipIfRocm
2323
from torch.testing._internal.inductor_utils import HAS_CUDA
2424

2525

@@ -192,6 +192,7 @@ def test_schedule(self):
192192
self.assertParses()
193193

194194
@requires_cuda
195+
@skipIfRocm(msg="TODO: temp skip on ROCm 6.2")
195196
def test_cudagraphs(self):
196197
fn_opt = torch.compile(mode="reduce-overhead")(inductor_schedule_fn)
197198
fn_opt(torch.ones(1000, 1000, device="cuda"))

test/functorch/test_ops.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -951,7 +951,7 @@ def fn(inp, *args, **kwargs):
951951
# (3) encountering this error in PyTorch internals.
952952
xfail("index_reduce", "prod"),
953953
decorate(
954-
"linalg.householder_product", decorator=runOnRocm
954+
"linalg.householder_product", decorator=skipIfRocm
955955
), # works on ROCm
956956
xfail(
957957
# nans

test/inductor/test_cuda_repro.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
freeze_rng_state,
3232
IS_FBCODE,
3333
skipIfRocm,
34+
skipIfRocmArch,
3435
TEST_WITH_ASAN,
3536
)
3637
from torch.testing._internal.inductor_utils import skipCUDAIf
@@ -52,7 +53,7 @@
5253
sys.exit(0)
5354
raise
5455

55-
56+
NAVI_ARCH = ("gfx1100", "gfx1101") # Used for navi exclusive skips on ROCm
5657
TestCase = test_torchinductor.TestCase
5758
ToTuple = test_torchinductor.ToTuple
5859
check_model_cuda = test_torchinductor.check_model_cuda

test/inductor/test_inductor_freezing.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,23 @@
2323
pytorch_test_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
2424
sys.path.append(pytorch_test_dir)
2525

26+
from torch.testing._internal.common_utils import (
27+
IS_CI,
28+
IS_WINDOWS,
29+
TEST_WITH_ASAN,
30+
TEST_WITH_ROCM,
31+
skipIfRocm,
32+
)
33+
34+
35+
if IS_WINDOWS and IS_CI:
36+
sys.stderr.write(
37+
"Windows CI does not have necessary dependencies for test_torchinductor yet\n"
38+
)
39+
if __name__ == "__main__":
40+
sys.exit(0)
41+
raise unittest.SkipTest("requires sympy/functorch/filelock")
42+
2643
from inductor.test_torchinductor import check_model, check_model_cuda, copy_tests
2744
from torch.testing._internal.common_utils import TEST_WITH_ASAN, TEST_WITH_ROCM
2845

test/inductor/test_kernel_benchmark.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from torch.testing import FileCheck
1616
from torch.testing._internal.common_device_type import expectedFailureXPU
1717
from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_GPU
18-
18+
from torch.testing._internal.common_utils import skipIfRocm
1919

2020
class TestKernelBenchmark(TestCase):
2121
device_type = GPU_TYPE
@@ -136,6 +136,7 @@ def f(a, b):
136136
@expectedFailureXPU
137137
@config.patch(max_autotune=True, max_autotune_gemm_backends="TRITON")
138138
@fresh_inductor_cache()
139+
@skipIfRocm #This seems to be disabled upstream https://github.com/pytorch/pytorch/issues/118346
139140
def test_mm_triton_kernel_benchmark(self):
140141
M = 2048
141142
N = 2432

test/inductor/test_torchinductor.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
from torch.testing._internal.common_device_type import (
7171
_has_sufficient_memory,
7272
expectedFailureXPU,
73+
get_desired_device_type_test_bases,
7374
)
7475
from torch.testing._internal.common_dtype import all_types, get_all_dtypes
7576
from torch.testing._internal.common_utils import (
@@ -85,6 +86,8 @@
8586
skipIfWindows,
8687
skipIfXpu,
8788
subtest,
89+
skipIfRocmArch,
90+
subtest,
8891
TEST_WITH_ASAN,
8992
TEST_WITH_ROCM,
9093
)
@@ -119,6 +122,10 @@
119122

120123

121124
HAS_AVX2 = "fbgemm" in torch.backends.quantized.supported_engines
125+
_desired_test_bases = get_desired_device_type_test_bases()
126+
RUN_CPU = any(getattr(x, "device_type", "") == "cpu" for x in _desired_test_bases)
127+
RUN_GPU = any(getattr(x, "device_type", "") == GPU_TYPE for x in _desired_test_bases)
128+
NAVI_ARCH = ("gfx1100", "gfx1101") # Used for navi exclusive skips on ROCm
122129

123130
aten = torch.ops.aten
124131

@@ -6847,6 +6854,8 @@ def fn(in_ptr0, in_ptr1, in_ptr2):
68476854
),
68486855
)
68496856

6857+
@skipIfWindows
6858+
@skipIfRocm
68506859
def test_roi_align(self):
68516860
if not has_torchvision_roi_align():
68526861
raise unittest.SkipTest("requires torchvision")
@@ -7686,6 +7695,7 @@ def fn(a, dim, index, b, reduce):
76867695
)
76877696

76887697
@skip_if_gpu_halide
7698+
# issue #1150
76897699
def test_dense_mask_index(self):
76907700
r"""
76917701
There will be a little difference for reduce order between aten and inductor

test/inductor/test_torchinductor_dynamic_shapes.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
TEST_CUDA_MEM_LEAK_CHECK,
3333
TEST_WITH_ASAN,
3434
TEST_WITH_ROCM,
35+
skipIfRocm,
3536
)
3637
from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_CPU, HAS_GPU
3738

@@ -241,6 +242,7 @@ def fn(x, y):
241242
self.assertEqual(r, opt_r)
242243

243244
@torch._dynamo.config.patch(capture_scalar_outputs=True)
245+
@skipIfRocm(msg="TODO: temp skip on ROCm 6.2")
244246
def test_unwrap_storage_didnt_work_repro(self, device):
245247
def f():
246248
full = torch.full((), 11)

test/nn/test_convolution.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
parametrize as parametrize_test,
5454
run_tests,
5555
set_default_dtype,
56+
skipIfRocm,
5657
skipIfNotMiopenSuggestNHWC,
5758
skipIfRocmVersionLessThan,
5859
subtest,
@@ -4022,8 +4023,10 @@ def test_conv_double_backward_strided_with_3D_input_and_weight(self, device):
40224023
self.assertEqual(grad_weight.shape, weight.shape)
40234024

40244025
@onlyCUDA
4025-
@largeTensorTest("40GB")
4026-
@largeTensorTest("24GB", "cpu")
4026+
@largeTensorTest('40GB')
4027+
@largeTensorTest('24GB', 'cpu')
4028+
# Skipped for ROCm temp - https://ontrack-internal.amd.com/browse/SWDEV-383635
4029+
@skipIfRocm
40274030
def test_conv3d_64bit_indexing(self, device):
40284031
x = torch.rand(1, 32, 512, 512, 256)
40294032
m = torch.nn.Conv3d(32, 1, kernel_size=1, padding=0, stride=1, bias=False)

test/run_test.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,9 @@ def __contains__(self, item):
185185
"distributed/_tensor/test_attention",
186186
]
187187

188+
if sys.version_info.major < 3 or (sys.version_info.major == 3 and sys.version_info.minor <= 9):
189+
ROCM_BLOCKLIST.append("test_typing")
190+
188191
XPU_BLOCKLIST = [
189192
"test_autograd",
190193
"profiler/test_cpp_thread",

test/test_cuda.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1884,9 +1884,8 @@ def test_graph_capture_oom(self):
18841884
with torch.cuda.graph(torch.cuda.CUDAGraph()):
18851885
torch.zeros(2**40, device="cuda")
18861886

1887-
@unittest.skipIf(
1888-
not TEST_CUDA_GRAPH, "CUDA >= 11.0 or ROCM >= 5.3 required for graphs"
1889-
)
1887+
@unittest.skipIf(not TEST_CUDA_GRAPH, "CUDA >= 11.0 or ROCM >= 5.3 required for graphs")
1888+
@skipIfRocm(msg="TODO: temp skip on ROCm 6.2")
18901889
@serialTest()
18911890
def test_repeat_graph_capture_cublas_workspace_memory(self):
18921891
(x, y, z) = 1024, 512, 64
@@ -2842,6 +2841,7 @@ def forward(self, input_dict: dict):
28422841
@unittest.skipIf(
28432842
not TEST_CUDA_GRAPH, "CUDA >= 11.0 or ROCM >= 5.3 required for graphs"
28442843
)
2844+
@skipIfRocm(msg="TODO: temp skip on ROCm 6.2")
28452845
def test_graph_make_graphed_callables_same_pool(self):
28462846
torch.manual_seed(5)
28472847
torch.cuda.manual_seed(5)

test/test_fx.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
IS_WINDOWS,
5858
find_library_location,
5959
run_tests,
60+
skipIfRocm,
6061
skipIfTorchDynamo,
6162
)
6263
from torch.testing._internal.jit_utils import JitTestCase
@@ -4183,6 +4184,7 @@ def test_class_member_back_compat(self):
41834184
f"and subsequently --accept the change."
41844185
raise AssertionError(msg) from e
41854186

4187+
@skipIfRocm(msg="TODO: flaky - https://github.com/pytorch/pytorch/issues/104012")
41864188
def test_public_api_surface(self):
41874189
non_back_compat_objects = {}
41884190

torch/testing/_internal/common_utils.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1279,6 +1279,14 @@ def printErrors(self) -> None:
12791279
IS_X86 = platform.machine() in ('x86_64', 'i386')
12801280
IS_ARM64 = platform.machine() in ('arm64', 'aarch64')
12811281

1282+
def is_navi_arch():
1283+
if torch.cuda.is_available():
1284+
prop = torch.cuda.get_device_properties(0)
1285+
gfx_arch = prop.gcnArchName.split(":")[0]
1286+
if gfx_arch in ["gfx1100", "gfx1101", "gfx1102"]:
1287+
return True
1288+
return False
1289+
12821290
def is_avx512_vnni_supported():
12831291
if sys.platform != 'linux':
12841292
return False
@@ -1754,6 +1762,19 @@ def wrapper(*args, **kwargs):
17541762
return dec_fn(func)
17551763
return dec_fn
17561764

1765+
def skipIfRocmArch(arch: Tuple[str, ...]):
1766+
def dec_fn(fn):
1767+
@wraps(fn)
1768+
def wrap_fn(self, *args, **kwargs):
1769+
if TEST_WITH_ROCM:
1770+
prop = torch.cuda.get_device_properties(0)
1771+
if prop.gcnArchName.split(":")[0] in arch:
1772+
reason = f"skipIfRocm: test skipped on {arch}"
1773+
raise unittest.SkipTest(reason)
1774+
return fn(self, *args, **kwargs)
1775+
return wrap_fn
1776+
return dec_fn
1777+
17571778
def runOnRocm(fn):
17581779
@wraps(fn)
17591780
def wrapper(*args, **kwargs):

torch/testing/_internal/distributed/distributed_test.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4863,7 +4863,11 @@ def _test_ddp_apply_optim_in_backward(
48634863
# set_to_none for regular optimizer to match in backward
48644864
# case.
48654865
optim.zero_grad(set_to_none=True)
4866-
4866+
4867+
@skip_but_pass_in_sandcastle_if(
4868+
BACKEND == "gloo" and HAS_TORCHVISION,
4869+
"Failing with gloo backend + torchvision due to ongoing issue https://github.com/pytorch/pytorch/issues/111834",
4870+
)
48674871
@skip_if_lt_x_gpu(2)
48684872
def test_ddp_apply_optim_in_backward(self):
48694873
for optim_cls, init_before in itertools.product(
@@ -4876,6 +4880,10 @@ def test_ddp_apply_optim_in_backward(self):
48764880
init_before=init_before,
48774881
)
48784882

4883+
@skip_but_pass_in_sandcastle_if(
4884+
BACKEND == "gloo" and HAS_TORCHVISION,
4885+
"Failing with gloo backend + torchvision due to ongoing issue https://github.com/pytorch/pytorch/issues/111834",
4886+
)
48794887
@skip_if_lt_x_gpu(2)
48804888
def test_ddp_apply_optim_in_backward_grad_as_bucket_view_false(self):
48814889
for init_before in [True, False]:

0 commit comments

Comments
 (0)