Lightning-AI
diff --git a/‎CHANGELOG.md
Lines changed: 14 additions & 8 deletions b/‎CHANGELOG.md
Lines changed: 14 additions & 8 deletions
diff --git a/‎docs/source/advanced/multi_gpu.rst
Lines changed: 4 additions & 4 deletions b/‎docs/source/advanced/multi_gpu.rst
Lines changed: 4 additions & 4 deletions
diff --git a/‎docs/source/common/optimizers.rst
Lines changed: 0 additions & 2 deletions b/‎docs/source/common/optimizers.rst
Lines changed: 0 additions & 2 deletions
diff --git a/‎docs/source/starter/new-project.rst
Lines changed: 1 addition & 1 deletion b/‎docs/source/starter/new-project.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎pytorch_lightning/core/lightning.py
Lines changed: 0 additions & 3 deletions b/‎pytorch_lightning/core/lightning.py
Lines changed: 0 additions & 3 deletions
diff --git a/‎pytorch_lightning/trainer/connectors/accelerator_connector.py
Lines changed: 16 additions & 0 deletions b/‎pytorch_lightning/trainer/connectors/accelerator_connector.py
Lines changed: 16 additions & 0 deletions
diff --git a/‎pytorch_lightning/trainer/connectors/optimizer_connector.py
Lines changed: 1 addition & 6 deletions b/‎pytorch_lightning/trainer/connectors/optimizer_connector.py
Lines changed: 1 addition & 6 deletions
diff --git a/‎pytorch_lightning/trainer/connectors/profiler_connector.py
Lines changed: 9 additions & 15 deletions b/‎pytorch_lightning/trainer/connectors/profiler_connector.py
Lines changed: 9 additions & 15 deletions
diff --git a/‎pytorch_lightning/trainer/trainer.py
Lines changed: 6 additions & 29 deletions b/‎pytorch_lightning/trainer/trainer.py
Lines changed: 6 additions & 29 deletions
diff --git a/‎pytorch_lightning/trainer/training_loop.py
Lines changed: 0 additions & 2 deletions b/‎pytorch_lightning/trainer/training_loop.py
Lines changed: 0 additions & 2 deletions
diff --git a/‎pytorch_lightning/utilities/__init__.py
Lines changed: 1 addition & 1 deletion b/‎pytorch_lightning/utilities/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎pytorch_lightning/utilities/enums.py
Lines changed: 13 additions & 3 deletions b/‎pytorch_lightning/utilities/enums.py
Lines changed: 13 additions & 3 deletions
@@ -18,16 +18,15 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Removed
 
+- Removed support for passing a bool value to `profiler` argument of Trainer ([#6164](https://github.com/PyTorchLightning/pytorch-lightning/pull/6164))
 
-### Fixed
 
-- Fixed incorrect yield logic for the amp autocast context manager ([#6080](https://github.com/PyTorchLightning/pytorch-lightning/pull/6080))
+- Removed deprecated Trainer argument `enable_pl_optimizer` and `automatic_optimization` ([#6163](https://github.com/PyTorchLightning/pytorch-lightning/pull/6163))
 
 
-- Made the `Plugin.reduce` method more consistent across all Plugins to reflect a mean-reduction by default ([#6011](https://github.com/PyTorchLightning/pytorch-lightning/pull/6011))
-
+### Fixed
 
-- Fixed priority of plugin/accelerator when setting distributed mode ([#6089](https://github.com/PyTorchLightning/pytorch-lightning/pull/6089))
+- Made the `Plugin.reduce` method more consistent across all Plugins to reflect a mean-reduction by default ([#6011](https://github.com/PyTorchLightning/pytorch-lightning/pull/6011))
 
 
 - Move lightning module to correct device type when using LightningDistributedWrapper ([#6070](https://github.com/PyTorchLightning/pytorch-lightning/pull/6070))
@@ -36,7 +35,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Do not print top-k verbose log with `ModelCheckpoint(monitor=None)` ([#6109](https://github.com/PyTorchLightning/pytorch-lightning/pull/6109))
 
 
-- Fixed error message for AMP + CPU incompatibility ([#6107](https://github.com/PyTorchLightning/pytorch-lightning/pull/6107))
+- Expose DeepSpeed loss parameters to allow users to fix loss instability ([#6115](https://github.com/PyTorchLightning/pytorch-lightning/pull/6115))
 
 
 - Expose DeepSpeed loss parameters to allow users to fix loss instability ([#6115](https://github.com/PyTorchLightning/pytorch-lightning/pull/6115))
@@ -45,6 +44,15 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed epoch level schedulers not being called when `val_check_interval < 1.0` ([#6075](https://github.com/PyTorchLightning/pytorch-lightning/pull/6075))
 
 
+## [1.2.1] - 2021-02-23
+
+### Fixed
+
+- Fixed incorrect yield logic for the amp autocast context manager ([#6080](https://github.com/PyTorchLightning/pytorch-lightning/pull/6080))
+- Fixed priority of plugin/accelerator when setting distributed mode ([#6089](https://github.com/PyTorchLightning/pytorch-lightning/pull/6089))
+- Fixed error message for AMP + CPU incompatibility ([#6107](https://github.com/PyTorchLightning/pytorch-lightning/pull/6107))
+
+
 ## [1.2.0] - 2021-02-18
 
 ### Added
@@ -91,10 +99,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Added `Trainer` flag to activate Stochastic Weight Averaging (SWA) `Trainer(stochastic_weight_avg=True)` ([#6038](https://github.com/PyTorchLightning/pytorch-lightning/pull/6038))
 - Added DeepSpeed integration ([#5954](https://github.com/PyTorchLightning/pytorch-lightning/pull/5954),
     [#6042](https://github.com/PyTorchLightning/pytorch-lightning/pull/6042))
-
 - Added a way to print to terminal without breaking up the progress bar ([#5470](https://github.com/PyTorchLightning/pytorch-lightning/pull/5470))
 
-
 ### Changed
 
 - Changed `stat_scores` metric now calculates stat scores over all classes and gains new parameters, in line with the new `StatScores` metric ([#4839](https://github.com/PyTorchLightning/pytorch-lightning/pull/4839))
 
@@ -690,9 +690,9 @@ DeepSpeed
 .. note::
     The DeepSpeed plugin is in beta and the API is subject to change. Please create an `issue <https://github.com/PyTorchLightning/pytorch-lightning/issues>`_ if you run into any issues.
 
-`DeepSpeed <https://github.com/microsoft/DeepSpeed>`_ offers additional CUDA deep learning training optimizations, similar to `FairScale <https://github.com/facebookresearch/fairscale>`_. DeepSpeed offers lower level training optimizations, and useful efficient optimizers such as `1-bit Adam <https://www.deepspeed.ai/tutorials/onebit-adam/>`_.
-Using the plugin, we were able to **train model sizes of 10 Billion parameters and above**, with a lot of useful information in this `benchmark <https://github.com/huggingface/transformers/issues/9996>`_ and the DeepSpeed `docs <https://www.deepspeed.ai/tutorials/megatron/>`_.
-We recommend using DeepSpeed in environments where speed and memory optimizations are important (such as training large billion parameter models). In addition, we recommend trying :ref:`sharded` first before trying DeepSpeed's further optimizations, primarily due to FairScale Sharded ease of use in scenarios such as multiple optimizers/schedulers.
+`DeepSpeed <https://github.com/microsoft/DeepSpeed>`_ is a deep learning training optimization library, providing the means to train massive billion parameter models at scale.
+Using the DeepSpeed plugin, we were able to **train model sizes of 10 Billion parameters and above**, with a lot of useful information in this `benchmark <https://github.com/huggingface/transformers/issues/9996>`_ and the DeepSpeed `docs <https://www.deepspeed.ai/tutorials/megatron/>`_.
+DeepSpeed also offers lower level training optimizations, and efficient optimizers such as `1-bit Adam <https://www.deepspeed.ai/tutorials/onebit-adam/>`_. We recommend using DeepSpeed in environments where speed and memory optimizations are important (such as training large billion parameter models).
 
 To use DeepSpeed, you first need to install DeepSpeed using the commands below.
 
@@ -706,7 +706,7 @@ Additionally if you run into any issues installing m4py, ensure you have openmpi
 .. note::
     Currently ``resume_from_checkpoint`` and manual optimization are not supported.
 
-    DeepSpeed only supports single optimizer, single scheduler.
+    DeepSpeed currently only supports single optimizer, single scheduler within the training loop.
 
 ZeRO-Offload
 """"""""""""
 
@@ -300,8 +300,6 @@ override the :meth:`optimizer_step` function.
 
 For example, here step optimizer A every 2 batches and optimizer B every 4 batches
 
-.. note:: When using Trainer(enable_pl_optimizer=True), there is no need to call `.zero_grad()`.
-
 .. testcode::
 
     def optimizer_zero_grad(self, current_epoch, batch_idx, optimizer, opt_idx):
 
@@ -737,7 +737,7 @@ Lightning has many tools for debugging. Here is an example of just a few of them
 .. testcode::
 
     # Profile your code to find speed/memory bottlenecks
-    Trainer(profiler=True)
+    Trainer(profiler="simple")
 
 ---------------
 
 
@@ -1324,9 +1324,6 @@ def optimizer_step(
         By default, Lightning calls ``step()`` and ``zero_grad()`` as shown in the example
         once per optimizer.
 
-        .. tip:: With ``Trainer(enable_pl_optimizer=True)``, you can use ``optimizer.step()`` directly
-         and it will handle zero_grad, accumulated gradients, AMP, TPU and more automatically for you.
-
         Warning:
             If you are overriding this method, make sure that you pass the ``optimizer_closure`` parameter
             to ``optimizer.step()`` function as shown in the examples. This ensures that
 
@@ -517,6 +517,9 @@ def set_distributed_mode(self, distributed_backend: Optional[str] = None):
                 rank_zero_warn('You are running on single node with no parallelization, so distributed has no effect.')
                 self._distrib_type = None
 
+        # finished configuring self._distrib_type, check ipython environment
+        self.check_interactive_compatibility()
+
         # for DDP overwrite nb processes by requested GPUs
         if (
             self._device_type == DeviceType.GPU
@@ -558,6 +561,19 @@ def _set_horovod_backend(self):
         else:
             self.num_processes = hvd.local_size()
 
+    def check_interactive_compatibility(self):
+        """
+        Raises a `MisconfigurationException` if the accelerator and/or plugin
+        is not compatible with an interactive environment
+        """
+        from pytorch_lightning.utilities import _IS_INTERACTIVE
+        if _IS_INTERACTIVE and self._distrib_type is not None and not self._distrib_type.is_interactive_compatible():
+            raise MisconfigurationException(
+                f"Selected distributed backend {self._distrib_type} is not compatible with an interactive"
+                " environment. Run your code as a script, or choose one of the compatible backends:"
+                f" {', '.join(DistributedType.interactive_compatible_types())}"
+            )
+
     def check_horovod(self):
         """Raises a `MisconfigurationException` if the Trainer is not configured correctly for Horovod."""
         if not _HOROVOD_AVAILABLE:
 
@@ -20,12 +20,7 @@ class OptimizerConnector:
     def __init__(self, trainer):
         self.trainer = trainer
 
-    def on_trainer_init(self, enable_pl_optimizer):
-        if enable_pl_optimizer is not None:
-            rank_zero_warn(
-                "Trainer argument `enable_pl_optimizer` is deprecated in v1.1.3. It will be removed in v1.3.0",
-                DeprecationWarning
-            )
+    def on_trainer_init(self):
         self.trainer.lr_schedulers = []
         self.trainer.optimizers = []
         self.trainer.optimizer_frequencies = []
 
@@ -21,35 +21,29 @@
     PyTorchProfiler,
     SimpleProfiler,
 )
-from pytorch_lightning.utilities import rank_zero_warn
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
-PROFILERS = {"simple": SimpleProfiler, "advanced": AdvancedProfiler, "pytorch": PyTorchProfiler}
+PROFILERS = {
+    "simple": SimpleProfiler,
+    "advanced": AdvancedProfiler,
+    "pytorch": PyTorchProfiler,
+}
 
 
 class ProfilerConnector:
 
     def __init__(self, trainer):
         self.trainer = trainer
 
-    def on_trainer_init(self, profiler: Union[BaseProfiler, bool, str]):
+    def on_trainer_init(self, profiler: Union[BaseProfiler, str]):
 
-        if profiler and not isinstance(profiler, (bool, str, BaseProfiler)):
-            # TODO: Update exception on removal of bool
+        if profiler and not isinstance(profiler, (str, BaseProfiler)):
             raise MisconfigurationException(
-                "Only None, bool, str and subclasses of `BaseProfiler`"
+                "Only None, str and subclasses of `BaseProfiler`"
                 " are valid values for `Trainer`'s `profiler` parameter."
                 f" Received {profiler} which is of type {type(profiler)}."
             )
-
-        if isinstance(profiler, bool):
-            rank_zero_warn(
-                "Passing a bool value as a `profiler` argument to `Trainer` is deprecated"
-                " and will be removed in v1.3. Use str ('simple' or 'advanced') instead.", DeprecationWarning
-            )
-            if profiler:
-                profiler = SimpleProfiler()
-        elif isinstance(profiler, str):
+        if isinstance(profiler, str):
             if profiler.lower() in PROFILERS:
                 profiler_class = PROFILERS[profiler.lower()]
                 profiler = profiler_class()
 
@@ -122,7 +122,7 @@ def __init__(
         num_sanity_val_steps: int = 2,
         truncated_bptt_steps: Optional[int] = None,
         resume_from_checkpoint: Optional[Union[Path, str]] = None,
-        profiler: Optional[Union[BaseProfiler, bool, str]] = None,
+        profiler: Optional[Union[BaseProfiler, str]] = None,
         benchmark: bool = False,
         deterministic: bool = False,
         reload_dataloaders_every_epoch: bool = False,
@@ -135,9 +135,7 @@ def __init__(
         amp_backend: str = 'native',
         amp_level: str = 'O2',
         distributed_backend: Optional[str] = None,
-        automatic_optimization: Optional[bool] = None,
         move_metrics_to_cpu: bool = False,
-        enable_pl_optimizer: bool = None,  # todo: remove in v1.3
         multiple_trainloader_mode: str = 'max_size_cycle',
         stochastic_weight_avg: bool = False
     ):
@@ -177,7 +175,7 @@ def __init__(
 
             checkpoint_callback: If ``True``, enable checkpointing.
                 It will configure a default ModelCheckpoint callback if there is no user-defined ModelCheckpoint in
-                :paramref:`~pytorch_lightning.trainer.trainer.Trainer.callbacks`. Default: ``True``.
+                :paramref:`~pytorch_lightning.trainer.trainer.Trainer.callbacks`.
 
                 .. warning:: Passing a ModelCheckpoint instance to this argument is deprecated since
                     v1.1 and will be unsupported from v1.3. Use `callbacks` argument instead.
@@ -213,10 +211,6 @@ def __init__(
 
             log_every_n_steps: How often to log within steps (defaults to every 50 steps).
 
-            automatic_optimization: If False you are responsible for calling .backward, .step, zero_grad
-                in LightningModule. This argument has been moved to LightningModule. It is deprecated
-                here in v1.1 and will be removed in v1.3.
-
             prepare_data_per_node: If True, each LOCAL_RANK=0 will call prepare data.
                 Otherwise only NODE_RANK=0, LOCAL_RANK=0 will prepare data
 
@@ -226,10 +220,9 @@ def __init__(
                 Ignored when a custom progress bar is passed to :paramref:`~Trainer.callbacks`. Default: None, means
                 a suitable value will be chosen based on the environment (terminal, Google COLAB, etc.).
 
-            profiler: To profile individual steps during training and assist in identifying bottlenecks. Passing bool
-                value is deprecated in v1.1 and will be removed in v1.3.
+            profiler: To profile individual steps during training and assist in identifying bottlenecks.
 
-            overfit_batches: Overfit a percent of training data (float) or a set number of batches (int). Default: 0.0
+            overfit_batches: Overfit a percent of training data (float) or a set number of batches (int).
 
             plugins: Plugins allow modification of core behavior like ddp and amp, and enable custom lightning plugins.
 
@@ -250,7 +243,7 @@ def __init__(
             num_processes: number of processes for distributed training with distributed_backend="ddp_cpu"
 
             num_sanity_val_steps: Sanity check runs n validation batches before starting the training routine.
-                Set it to `-1` to run all batches in all validation dataloaders. Default: 2
+                Set it to `-1` to run all batches in all validation dataloaders.
 
             reload_dataloaders_every_epoch: Set to True to reload dataloaders every epoch.
 
@@ -289,11 +282,6 @@ def __init__(
             move_metrics_to_cpu: Whether to force internal logged metrics to be moved to cpu.
                 This can save some gpu memory, but can make training slower. Use with attention.
 
-            enable_pl_optimizer: If True, each optimizer will be wrapped by
-                `pytorch_lightning.core.optimizer.LightningOptimizer`. It allows Lightning to
-                handle AMP, TPU, accumulated_gradients, etc.
-                .. warning:: Currently deprecated and it will be removed in v1.3
-
             multiple_trainloader_mode: How to loop over the datasets when there are multiple train loaders.
                 In 'max_size_cycle' mode, the trainer ends one epoch when the largest dataset is traversed,
                 and smaller datasets reload when running out of their data. In 'min_size' mode, all the datasets
@@ -346,7 +334,7 @@ def __init__(
         self.on_init_start()
 
         # init optimizer + lr scheduler related flags
-        self.optimizer_connector.on_trainer_init(enable_pl_optimizer)
+        self.optimizer_connector.on_trainer_init()
 
         # init data flags
         self.data_connector.on_trainer_init(
@@ -357,23 +345,12 @@ def __init__(
         self.training_tricks_connector.on_trainer_init(
             gradient_clip_val, track_grad_norm, accumulate_grad_batches, truncated_bptt_steps, terminate_on_nan
         )
-
-        # init train loop related flags
-        # TODO: remove in 1.3.0
-        if automatic_optimization is None:
-            automatic_optimization = True
-        else:
-            rank_zero_warn(
-                "Disable automatic optimization with the trainer flag is deprecated and will be removed in v1.3.0!"
-                "Please use the property on the LightningModule for disabling automatic optimization"
-            )
         self.train_loop.on_trainer_init(
             max_epochs,
             min_epochs,
             max_steps,
             min_steps,
             num_sanity_val_steps,
-            automatic_optimization,
             weights_summary,
         )
         self.evaluation_loop.on_trainer_init()
 
@@ -58,7 +58,6 @@ def on_trainer_init(
         max_steps,
         min_steps,
         num_sanity_val_steps,
-        automatic_optimization,
         weights_summary,
     ):
         self.trainer.global_step = 0
@@ -71,7 +70,6 @@ def on_trainer_init(
         self.trainer.batch_idx = 0
         self.trainer.num_training_batches = 0
         self.trainer.train_dataloader = None
-        self.automatic_optimization = automatic_optimization
 
         # If neither max_epochs or max_steps is set, then use existing default of max_epochs = 1000
         self.trainer.max_epochs = 1000 if (max_epochs is None and max_steps is None) else max_epochs
 
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """General utilities"""
-
 import numpy
 
 from pytorch_lightning.utilities.apply_func import move_data_to_device  # noqa: F401
@@ -33,6 +32,7 @@
     _HOROVOD_AVAILABLE,
     _HYDRA_AVAILABLE,
     _HYDRA_EXPERIMENTAL_AVAILABLE,
+    _IS_INTERACTIVE,
     _module_available,
     _NATIVE_AMP_AVAILABLE,
     _OMEGACONF_AVAILABLE,
 
@@ -13,14 +13,14 @@
 # limitations under the License.
 """Enumerated utilities"""
 from enum import Enum
-from typing import Union
+from typing import List, Optional, Union
 
 
 class LightningEnum(str, Enum):
     """ Type of any enumerator with allowed comparison to string invariant to cases. """
 
     @classmethod
-    def from_str(cls, value: str) -> 'LightningEnum':
+    def from_str(cls, value: str) -> Optional['LightningEnum']:
         statuses = [status for status in dir(cls) if not status.startswith('_')]
         for st in statuses:
             if st.lower() == value.lower():
@@ -31,7 +31,7 @@ def __eq__(self, other: Union[str, Enum]) -> bool:
         other = other.value if isinstance(other, Enum) else str(other)
         return self.value.lower() == other.lower()
 
-    def __hash__(self):
+    def __hash__(self) -> int:
         # re-enable hashtable so it can be used as a dict key or in a set
         # example: set(LightningEnum)
         return hash(self.name)
@@ -58,6 +58,16 @@ class DistributedType(LightningEnum):
     >>> DistributedType.DDP2 in ('ddp2', )
     True
     """
+
+    @staticmethod
+    def interactive_compatible_types() -> List['DistributedType']:
+        """Returns a list containing interactive compatible DistributeTypes"""
+        return [DistributedType.DP, DistributedType.DDP_SPAWN, DistributedType.DDP_SHARDED_SPAWN]
+
+    def is_interactive_compatible(self) -> bool:
+        """Returns whether self is interactive compatible"""
+        return self in DistributedType.interactive_compatible_types()
+
     DP = 'dp'
     DDP = 'ddp'
     DDP2 = 'ddp2'