Skip to content

Commit eb81500

Browse files
authored
Refactor: skipif for multi - gpus 1/n (#6266)
* ngpus * gpu * isort * pt * flake8
1 parent dc8647e commit eb81500

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+215
-199
lines changed

tests/accelerators/test_accelerator_connector.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from pytorch_lightning.plugins.environments import ClusterEnvironment, SLURMEnvironment, TorchElasticEnvironment
3535
from pytorch_lightning.utilities.exceptions import MisconfigurationException
3636
from tests.helpers.boring_model import BoringModel
37+
from tests.helpers.skipif import RunIf
3738

3839

3940
def test_accelerator_choice_cpu(tmpdir):
@@ -119,7 +120,7 @@ def on_fit_start(self, trainer, pl_module):
119120
trainer.fit(model)
120121

121122

122-
@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU")
123+
@RunIf(min_gpus=1)
123124
@mock.patch.dict(
124125
os.environ, {
125126
"CUDA_VISIBLE_DEVICES": "0,1",
@@ -157,7 +158,7 @@ def on_fit_start(self, trainer, pl_module):
157158
trainer.fit(model)
158159

159160

160-
@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU")
161+
@RunIf(min_gpus=1)
161162
@mock.patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,1", "WORLD_SIZE": "2", "LOCAL_RANK": "10", "NODE_RANK": "0"})
162163
@mock.patch('torch.cuda.device_count', return_value=2)
163164
def test_accelerator_choice_ddp_te(device_count_mock):
@@ -185,7 +186,7 @@ def on_fit_start(self, trainer, pl_module):
185186
trainer.fit(model)
186187

187188

188-
@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU")
189+
@RunIf(min_gpus=1)
189190
@mock.patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,1", "WORLD_SIZE": "2", "LOCAL_RANK": "10", "NODE_RANK": "0"})
190191
@mock.patch('torch.cuda.device_count', return_value=2)
191192
def test_accelerator_choice_ddp2_te(device_count_mock):

tests/accelerators/test_ddp.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,13 @@
2121
from pytorch_lightning import Trainer
2222
from tests.accelerators import ddp_model, DDPLauncher
2323
from tests.helpers.boring_model import BoringModel
24+
from tests.helpers.skipif import RunIf
2425
from tests.utilities.distributed import call_training_script
2526

2627
CLI_ARGS = '--max_epochs 1 --gpus 2 --accelerator ddp'
2728

2829

29-
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
30+
@RunIf(min_gpus=2)
3031
def test_multi_gpu_model_ddp_fit_only(tmpdir):
3132
# call the script
3233
call_training_script(ddp_model, CLI_ARGS, 'fit', tmpdir, timeout=120)
@@ -39,7 +40,7 @@ def test_multi_gpu_model_ddp_fit_only(tmpdir):
3940
assert result['status'] == 'complete'
4041

4142

42-
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
43+
@RunIf(min_gpus=2)
4344
def test_multi_gpu_model_ddp_test_only(tmpdir):
4445
# call the script
4546
call_training_script(ddp_model, CLI_ARGS, 'test', tmpdir)
@@ -52,7 +53,7 @@ def test_multi_gpu_model_ddp_test_only(tmpdir):
5253
assert result['status'] == 'complete'
5354

5455

55-
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
56+
@RunIf(min_gpus=2)
5657
def test_multi_gpu_model_ddp_fit_test(tmpdir):
5758
# call the script
5859
call_training_script(ddp_model, CLI_ARGS, 'fit_test', tmpdir, timeout=20)
@@ -69,7 +70,7 @@ def test_multi_gpu_model_ddp_fit_test(tmpdir):
6970
assert out['test_acc'] > 0.7
7071

7172

72-
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
73+
@RunIf(min_gpus=2)
7374
@DDPLauncher.run(
7475
"--max_epochs [max_epochs] --gpus 2 --accelerator [accelerator]",
7576
max_epochs=["1"],

tests/accelerators/test_ddp_spawn.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
import pytest
15-
import torch
1614

1715
import tests.helpers.pipelines as tpipes
1816
import tests.helpers.utils as tutils
@@ -23,9 +21,10 @@
2321
from tests.helpers import BoringModel
2422
from tests.helpers.datamodules import ClassifDataModule
2523
from tests.helpers.simple_models import ClassificationModel
24+
from tests.helpers.skipif import RunIf
2625

2726

28-
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
27+
@RunIf(min_gpus=2)
2928
def test_multi_gpu_early_stop_ddp_spawn(tmpdir):
3029
tutils.set_random_master_port()
3130

@@ -44,7 +43,7 @@ def test_multi_gpu_early_stop_ddp_spawn(tmpdir):
4443
tpipes.run_model_test(trainer_options, model, dm)
4544

4645

47-
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
46+
@RunIf(min_gpus=2)
4847
def test_multi_gpu_model_ddp_spawn(tmpdir):
4948
tutils.set_random_master_port()
5049

@@ -66,7 +65,7 @@ def test_multi_gpu_model_ddp_spawn(tmpdir):
6665
memory.get_memory_profile('min_max')
6766

6867

69-
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
68+
@RunIf(min_gpus=2)
7069
def test_ddp_all_dataloaders_passed_to_fit(tmpdir):
7170
"""Make sure DDP works with dataloaders passed to fit()"""
7271
tutils.set_random_master_port()

tests/accelerators/test_dp.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,6 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
import os
15-
from unittest import mock
16-
17-
import pytest
1814
import torch
1915
import torch.nn.functional as F
2016

@@ -26,6 +22,7 @@
2622
from tests.helpers import BoringModel
2723
from tests.helpers.datamodules import ClassifDataModule
2824
from tests.helpers.simple_models import ClassificationModel
25+
from tests.helpers.skipif import RunIf
2926

3027
PRETEND_N_OF_GPUS = 16
3128

@@ -55,7 +52,7 @@ def test_step_end(self, outputs):
5552
self.log('test_acc', self.test_acc(outputs['logits'], outputs['y']))
5653

5754

58-
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
55+
@RunIf(min_gpus=2)
5956
def test_multi_gpu_early_stop_dp(tmpdir):
6057
"""Make sure DDP works. with early stopping"""
6158
tutils.set_random_master_port()
@@ -76,7 +73,7 @@ def test_multi_gpu_early_stop_dp(tmpdir):
7673
tpipes.run_model_test(trainer_options, model, dm)
7774

7875

79-
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
76+
@RunIf(min_gpus=2)
8077
def test_multi_gpu_model_dp(tmpdir):
8178
tutils.set_random_master_port()
8279

@@ -98,8 +95,7 @@ def test_multi_gpu_model_dp(tmpdir):
9895
memory.get_memory_profile('min_max')
9996

10097

101-
@mock.patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,1"})
102-
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
98+
@RunIf(min_gpus=2)
10399
def test_dp_test(tmpdir):
104100
tutils.set_random_master_port()
105101

tests/callbacks/test_gpu_stats_monitor.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,10 @@
2424
from pytorch_lightning.trainer.states import TrainerState
2525
from pytorch_lightning.utilities.exceptions import MisconfigurationException
2626
from tests.helpers import BoringModel
27+
from tests.helpers.skipif import RunIf
2728

2829

29-
@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
30+
@RunIf(min_gpus=1)
3031
def test_gpu_stats_monitor(tmpdir):
3132
"""
3233
Test GPU stats are logged using a logger.
@@ -76,7 +77,7 @@ def test_gpu_stats_monitor_cpu_machine(tmpdir):
7677
GPUStatsMonitor()
7778

7879

79-
@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
80+
@RunIf(min_gpus=1)
8081
def test_gpu_stats_monitor_no_logger(tmpdir):
8182
"""
8283
Test GPUStatsMonitor with no logger in Trainer.
@@ -96,7 +97,7 @@ def test_gpu_stats_monitor_no_logger(tmpdir):
9697
trainer.fit(model)
9798

9899

99-
@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
100+
@RunIf(min_gpus=1)
100101
def test_gpu_stats_monitor_no_gpu_warning(tmpdir):
101102
"""
102103
Test GPUStatsMonitor raises a warning when not training on GPU device.

tests/callbacks/test_pruning.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from pytorch_lightning.callbacks import ModelPruning
2828
from pytorch_lightning.utilities.exceptions import MisconfigurationException
2929
from tests.helpers import BoringModel
30+
from tests.helpers.skipif import RunIf
3031

3132

3233
class TestModel(BoringModel):
@@ -168,7 +169,7 @@ def test_pruning_callback_ddp(tmpdir, use_global_unstructured, parameters_to_pru
168169
)
169170

170171

171-
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
172+
@RunIf(min_gpus=2)
172173
@pytest.mark.skipif(platform.system() == "Windows", reason="Distributed training is not supported on Windows")
173174
def test_pruning_callback_ddp_spawn(tmpdir):
174175
train_with_pruning_callback(tmpdir, use_global_unstructured=True, accelerator="ddp_spawn", gpus=2)

tests/callbacks/test_quantization.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,12 @@
2222
from pytorch_lightning.utilities.exceptions import MisconfigurationException
2323
from tests.helpers.datamodules import RegressDataModule
2424
from tests.helpers.simple_models import RegressionModel
25-
from tests.helpers.skipif import SkipIf
25+
from tests.helpers.skipif import RunIf
2626

2727

28-
@pytest.mark.parametrize(
29-
"observe",
30-
['average', pytest.param('histogram', marks=SkipIf(min_torch="1.5"))]
31-
)
28+
@pytest.mark.parametrize("observe", ['average', pytest.param('histogram', marks=RunIf(min_torch="1.5"))])
3229
@pytest.mark.parametrize("fuse", [True, False])
33-
@SkipIf(quantization=True)
30+
@RunIf(quantization=True)
3431
def test_quantization(tmpdir, observe, fuse):
3532
"""Parity test for quant model"""
3633
seed_everything(42)
@@ -65,7 +62,7 @@ def test_quantization(tmpdir, observe, fuse):
6562
assert torch.allclose(org_score, quant_score, atol=0.45)
6663

6764

68-
@SkipIf(quantization=True)
65+
@RunIf(quantization=True)
6966
def test_quantize_torchscript(tmpdir):
7067
"""Test converting to torchscipt """
7168
dm = RegressDataModule()
@@ -81,7 +78,7 @@ def test_quantize_torchscript(tmpdir):
8178
tsmodel(tsmodel.quant(batch[0]))
8279

8380

84-
@SkipIf(quantization=True)
81+
@RunIf(quantization=True)
8582
def test_quantization_exceptions(tmpdir):
8683
"""Test wrong fuse layers"""
8784
with pytest.raises(MisconfigurationException, match='Unsupported qconfig'):
@@ -124,7 +121,7 @@ def custom_trigger_last(trainer):
124121
(custom_trigger_last, 2),
125122
]
126123
)
127-
@SkipIf(quantization=True)
124+
@RunIf(quantization=True)
128125
def test_quantization_triggers(tmpdir, trigger_fn, expected_count):
129126
"""Test how many times the quant is called"""
130127
dm = RegressDataModule()

tests/callbacks/test_stochastic_weight_avg.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_6
2525
from pytorch_lightning.utilities.exceptions import MisconfigurationException
2626
from tests.helpers import BoringModel, RandomDataset
27+
from tests.helpers.skipif import RunIf
2728

2829
if _TORCH_GREATER_EQUAL_1_6:
2930
from pytorch_lightning.callbacks import StochasticWeightAveraging
@@ -114,40 +115,37 @@ def train_with_swa(tmpdir, batchnorm=True, accelerator=None, gpus=None, num_proc
114115
assert trainer.lightning_module == model
115116

116117

117-
@pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_6, reason="SWA available from PyTorch 1.6.0")
118-
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
118+
@RunIf(min_gpus=2, min_torch="1.6.0")
119119
@pytest.mark.skipif(
120120
not os.getenv("PL_RUNNING_SPECIAL_TESTS", '0') == '1', reason="test should be run outside of pytest"
121121
)
122122
def test_swa_callback_ddp(tmpdir):
123123
train_with_swa(tmpdir, accelerator="ddp", gpus=2)
124124

125125

126-
@pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_6, reason="SWA available from PyTorch 1.6.0")
127-
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
126+
@RunIf(min_gpus=2, min_torch="1.6.0")
128127
def test_swa_callback_ddp_spawn(tmpdir):
129128
train_with_swa(tmpdir, accelerator="ddp_spawn", gpus=2)
130129

131130

132-
@pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_6, reason="SWA available from PyTorch 1.6.0")
131+
@RunIf(min_torch="1.6.0")
133132
@pytest.mark.skipif(platform.system() == "Windows", reason="ddp_cpu is not available on Windows")
134133
def test_swa_callback_ddp_cpu(tmpdir):
135134
train_with_swa(tmpdir, accelerator="ddp_cpu", num_processes=2)
136135

137136

138-
@pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_6, reason="SWA available from PyTorch 1.6.0")
139-
@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires a GPU machine")
137+
@RunIf(min_gpus=1, min_torch="1.6.0")
140138
def test_swa_callback_1_gpu(tmpdir):
141139
train_with_swa(tmpdir, gpus=1)
142140

143141

144-
@pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_6, reason="SWA available from PyTorch 1.6.0")
142+
@RunIf(min_torch="1.6.0")
145143
@pytest.mark.parametrize("batchnorm", (True, False))
146144
def test_swa_callback(tmpdir, batchnorm):
147145
train_with_swa(tmpdir, batchnorm=batchnorm)
148146

149147

150-
@pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_6, reason="SWA available from PyTorch 1.6.0")
148+
@RunIf(min_torch="1.6.0")
151149
def test_swa_raises():
152150
with pytest.raises(MisconfigurationException, match=">0 integer or a float between 0 and 1"):
153151
StochasticWeightAveraging(swa_epoch_start=0, swa_lrs=0.1)
@@ -161,7 +159,7 @@ def test_swa_raises():
161159

162160
@pytest.mark.parametrize('stochastic_weight_avg', [False, True])
163161
@pytest.mark.parametrize('use_callbacks', [False, True])
164-
@pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_6, reason="SWA available from PyTorch 1.6.0")
162+
@RunIf(min_torch="1.6.0")
165163
def test_trainer_and_stochastic_weight_avg(tmpdir, use_callbacks, stochastic_weight_avg):
166164
"""Test to ensure SWA Callback is injected when `stochastic_weight_avg` is provided to the Trainer"""
167165

tests/checkpointing/test_torch_saving.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
from pytorch_lightning import Trainer
2121
from tests.helpers import BoringModel
22+
from tests.helpers.skipif import RunIf
2223

2324

2425
def test_model_torch_save(tmpdir):
@@ -57,7 +58,7 @@ def test_model_torch_save_ddp_cpu(tmpdir):
5758
torch.save(trainer, temp_path)
5859

5960

60-
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
61+
@RunIf(min_gpus=2)
6162
def test_model_torch_save_ddp_cuda(tmpdir):
6263
"""Test to ensure torch save does not fail for model and trainer using gpu ddp."""
6364
model = BoringModel()

tests/core/test_datamodules.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
from unittest import mock
1818
from unittest.mock import PropertyMock
1919

20-
import pytest
2120
import torch
2221
import torch.nn.functional as F
2322

@@ -28,6 +27,7 @@
2827
from tests.helpers import BoringDataModule, BoringModel
2928
from tests.helpers.datamodules import ClassifDataModule
3029
from tests.helpers.simple_models import ClassificationModel
30+
from tests.helpers.skipif import RunIf
3131
from tests.helpers.utils import reset_seed, set_random_master_port
3232

3333

@@ -348,7 +348,7 @@ def test_trainer_attached_to_dm(tmpdir):
348348
assert dm.trainer is not None
349349

350350

351-
@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
351+
@RunIf(min_gpus=1)
352352
def test_full_loop_single_gpu(tmpdir):
353353
reset_seed()
354354

@@ -373,7 +373,7 @@ def test_full_loop_single_gpu(tmpdir):
373373
assert result[0]['test_acc'] > 0.6
374374

375375

376-
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
376+
@RunIf(min_gpus=2)
377377
def test_full_loop_dp(tmpdir):
378378
set_random_master_port()
379379

@@ -420,7 +420,7 @@ def test_step_end(self, outputs):
420420
assert result[0]['test_acc'] > 0.6
421421

422422

423-
@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
423+
@RunIf(min_gpus=1)
424424
@mock.patch("pytorch_lightning.accelerators.accelerator.Accelerator.lightning_module", new_callable=PropertyMock)
425425
def test_dm_apply_batch_transfer_handler(get_module_mock):
426426
expected_device = torch.device('cuda', 0)

tests/core/test_decorators.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,10 @@
1616

1717
from pytorch_lightning.core.decorators import auto_move_data
1818
from tests.helpers import BoringModel
19+
from tests.helpers.skipif import RunIf
1920

2021

21-
@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
22+
@RunIf(min_gpus=1)
2223
@pytest.mark.parametrize(['src_device', 'dest_device'], [
2324
pytest.param(torch.device('cpu'), torch.device('cpu')),
2425
pytest.param(torch.device('cpu', 0), torch.device('cuda', 0)),

0 commit comments

Comments
 (0)