Skip to content

Commit d4dad19

Browse files
awaelchlilexierule
authored andcommitted
fix amp/apex misconfiguration error for cpu (Lightning-AI#6107)
* fix weird test * fix apex plugin test * fix raise * cpu test * fix type * add changelog
1 parent 0455231 commit d4dad19

File tree

5 files changed

+38
-97
lines changed

5 files changed

+38
-97
lines changed

CHANGELOG.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,14 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
1515
- Fixed priority of plugin/accelerator when setting distributed mode ([#6089](https://github.com/PyTorchLightning/pytorch-lightning/pull/6089))
1616

1717

18+
- Fixed error message for AMP + CPU incompatibility ([#6107](https://github.com/PyTorchLightning/pytorch-lightning/pull/6107))
19+
20+
1821
## [1.2.0] - 2021-02-18
1922

2023
### Added
2124

22-
- Added `DataType`, `AverageMethod` and `MDMCAverageMethod` enum in metrics ([#5657](https://github.com/PyTorchLightning/pytorch-lightning/pull/5689)
25+
- Added `DataType`, `AverageMethod` and `MDMCAverageMethod` enum in metrics ([#5657](https://github.com/PyTorchLightning/pytorch-lightning/pull/5689))
2326
- Added support for summarized model total params size in megabytes ([#5590](https://github.com/PyTorchLightning/pytorch-lightning/pull/5590))
2427
- Added support for multiple train loaders ([#1959](https://github.com/PyTorchLightning/pytorch-lightning/pull/1959))
2528
- Added `Accuracy` metric now generalizes to Top-k accuracy for (multi-dimensional) multi-class inputs using the `top_k` parameter ([#4838](https://github.com/PyTorchLightning/pytorch-lightning/pull/4838))

pytorch_lightning/accelerators/cpu.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ class CPUAccelerator(Accelerator):
77

88
def setup(self, trainer, model):
99
if isinstance(self.precision_plugin, MixedPrecisionPlugin):
10-
MisconfigurationException("amp + cpu is not supported. Please use a GPU option")
10+
raise MisconfigurationException("amp + cpu is not supported. Please use a GPU option")
1111

1212
if "cpu" not in str(self.root_device):
1313
raise MisconfigurationException(f"Device should be CPU, got {self.root_device} instead")

tests/accelerators/test_cpu.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from unittest.mock import Mock
2+
3+
import pytest
4+
import torch
5+
6+
from pytorch_lightning.accelerators import CPUAccelerator
7+
from pytorch_lightning.plugins import SingleDevicePlugin
8+
from pytorch_lightning.plugins.precision import MixedPrecisionPlugin
9+
from pytorch_lightning.utilities.exceptions import MisconfigurationException
10+
11+
12+
def test_unsupported_precision_plugins():
13+
""" Test error messages are raised for unsupported precision plugins with CPU. """
14+
trainer = Mock()
15+
model = Mock()
16+
accelerator = CPUAccelerator(
17+
training_type_plugin=SingleDevicePlugin(torch.device("cpu")),
18+
precision_plugin=MixedPrecisionPlugin()
19+
)
20+
with pytest.raises(MisconfigurationException, match=r"amp \+ cpu is not supported."):
21+
accelerator.setup(trainer=trainer, model=model)

tests/plugins/test_amp_plugin.py

Lines changed: 4 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,8 @@
55
import torch
66

77
from pytorch_lightning import Trainer
8-
from pytorch_lightning.callbacks import Callback
98
from pytorch_lightning.plugins import NativeMixedPrecisionPlugin
109
from pytorch_lightning.utilities import _NATIVE_AMP_AVAILABLE
11-
from pytorch_lightning.utilities.exceptions import MisconfigurationException
1210
from tests.helpers.boring_model import BoringModel
1311

1412

@@ -25,78 +23,21 @@
2523
)
2624
@mock.patch('torch.cuda.device_count', return_value=2)
2725
@pytest.mark.parametrize(
28-
['ddp_backend', 'gpus', 'num_processes'],
29-
[('ddp_cpu', None, 2), ('ddp', 2, 0), ('ddp2', 2, 0), ('ddp_spawn', 2, 0)],
26+
['ddp_backend', 'gpus'],
27+
[('ddp', 2), ('ddp2', 2), ('ddp_spawn', 2)],
3028
)
31-
def on_fit_start(tmpdir, ddp_backend, gpus, num_processes):
32-
33-
class CB(Callback):
34-
35-
def on_fit_start(self, trainer, pl_module):
36-
assert isinstance(trainer.precision_plugin, NativeMixedPrecisionPlugin)
37-
raise SystemExit()
38-
39-
def train():
40-
model = BoringModel()
41-
trainer = Trainer(
42-
fast_dev_run=True,
43-
precision=16,
44-
amp_backend='native',
45-
gpus=gpus,
46-
num_processes=num_processes,
47-
accelerator=ddp_backend,
48-
callbacks=[CB()],
49-
)
50-
trainer.fit(model)
51-
52-
if ddp_backend == "ddp_cpu":
53-
with pytest.raises(MisconfigurationException, match="MP is only available on GPU"):
54-
train()
55-
else:
56-
with pytest.raises(SystemExit):
57-
train()
58-
59-
60-
@pytest.mark.skipif(not _NATIVE_AMP_AVAILABLE, reason="Minimal PT version is set to 1.6")
61-
@mock.patch.dict(
62-
os.environ, {
63-
"CUDA_VISIBLE_DEVICES": "0,1",
64-
"SLURM_NTASKS": "2",
65-
"SLURM_JOB_NAME": "SOME_NAME",
66-
"SLURM_NODEID": "0",
67-
"LOCAL_RANK": "0",
68-
"SLURM_LOCALID": "0"
69-
}
70-
)
71-
@mock.patch('torch.cuda.device_count', return_value=2)
72-
@pytest.mark.parametrize(
73-
['ddp_backend', 'gpus', 'num_processes'],
74-
[('ddp_cpu', None, 2), ('ddp', 2, 0), ('ddp2', 2, 0), ('ddp_spawn', 2, 0)],
75-
)
76-
def test_amp_choice_custom_ddp_cpu(tmpdir, ddp_backend, gpus, num_processes):
29+
def test_amp_choice_custom_ddp_cpu(device_count_mock, ddp_backend, gpus):
7730

7831
class MyNativeAMP(NativeMixedPrecisionPlugin):
7932
pass
8033

81-
class CB(Callback):
82-
83-
def on_fit_start(self, trainer, pl_module):
84-
assert isinstance(trainer.precision_plugin, MyNativeAMP)
85-
raise SystemExit()
86-
87-
model = BoringModel()
8834
trainer = Trainer(
89-
fast_dev_run=True,
9035
precision=16,
9136
amp_backend='native',
92-
num_processes=num_processes,
9337
accelerator=ddp_backend,
9438
plugins=[MyNativeAMP()],
95-
callbacks=[CB()],
9639
)
97-
98-
with pytest.raises(SystemExit):
99-
trainer.fit(model)
40+
assert isinstance(trainer.precision_plugin, MyNativeAMP)
10041

10142

10243
class GradientUnscaleBoringModel(BoringModel):

tests/plugins/test_apex_plugin.py

Lines changed: 8 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,8 @@
44
import pytest
55

66
from pytorch_lightning import Trainer
7-
from pytorch_lightning.callbacks import Callback
87
from pytorch_lightning.plugins import ApexMixedPrecisionPlugin
98
from pytorch_lightning.utilities import _APEX_AVAILABLE
10-
from tests.helpers.boring_model import BoringModel
119

1210

1311
@pytest.mark.skipif(not _APEX_AVAILABLE, reason="test requires apex")
@@ -23,30 +21,19 @@
2321
)
2422
@mock.patch('torch.cuda.device_count', return_value=2)
2523
@pytest.mark.parametrize(
26-
['ddp_backend', 'gpus', 'num_processes'],
27-
[('ddp_cpu', None, 2), ('ddp', 2, 0), ('ddp2', 2, 0), ('ddp_spawn', 2, 0)],
24+
['ddp_backend', 'gpus'],
25+
[('ddp', 2), ('ddp2', 2), ('ddp_spawn', 2)],
2826
)
29-
def test_amp_choice_default_ddp_cpu(tmpdir, ddp_backend, gpus, num_processes):
27+
def test_amp_choice_default_ddp(mocked_device_count, ddp_backend, gpus):
3028

31-
class CB(Callback):
32-
33-
def on_fit_start(self, trainer, pl_module):
34-
assert isinstance(trainer.precision_plugin, ApexMixedPrecisionPlugin)
35-
raise SystemExit()
36-
37-
model = BoringModel()
3829
trainer = Trainer(
3930
fast_dev_run=True,
4031
precision=16,
4132
amp_backend='apex',
4233
gpus=gpus,
43-
num_processes=num_processes,
4434
accelerator=ddp_backend,
45-
callbacks=[CB()],
4635
)
47-
48-
with pytest.raises(SystemExit):
49-
trainer.fit(model)
36+
assert isinstance(trainer.precision_plugin, ApexMixedPrecisionPlugin)
5037

5138

5239
@pytest.mark.skipif(not _APEX_AVAILABLE, reason="test requires apex")
@@ -62,31 +49,20 @@ def on_fit_start(self, trainer, pl_module):
6249
)
6350
@mock.patch('torch.cuda.device_count', return_value=2)
6451
@pytest.mark.parametrize(
65-
['ddp_backend', 'gpus', 'num_processes'],
66-
[('ddp_cpu', None, 2), ('ddp', 2, 0), ('ddp2', 2, 0), ('ddp_spawn', 2, 0)],
52+
['ddp_backend', 'gpus'],
53+
[('ddp', 2), ('ddp2', 2), ('ddp_spawn', 2)],
6754
)
68-
def test_amp_choice_custom_ddp_cpu(tmpdir, ddp_backend, gpus, num_processes):
55+
def test_amp_choice_custom_ddp(mocked_device_count, ddp_backend, gpus):
6956

7057
class MyApexPlugin(ApexMixedPrecisionPlugin):
7158
pass
7259

73-
class CB(Callback):
74-
75-
def on_fit_start(self, trainer, pl_module):
76-
assert isinstance(trainer.precision_plugin, MyApexPlugin)
77-
raise SystemExit()
78-
79-
model = BoringModel()
8060
trainer = Trainer(
8161
fast_dev_run=True,
8262
precision=16,
8363
amp_backend='apex',
8464
gpus=gpus,
85-
num_processes=num_processes,
8665
accelerator=ddp_backend,
8766
plugins=[MyApexPlugin(amp_level="O2")],
88-
callbacks=[CB()],
8967
)
90-
91-
with pytest.raises(SystemExit):
92-
trainer.fit(model)
68+
assert isinstance(trainer.precision_plugin, MyApexPlugin)

0 commit comments

Comments
 (0)