minor usability enh bundle workflow (#5480)

wyli · Nic-Ma · web-flow · commit d51edc3ddf84 · 2022-11-16T15:31:48.000Z
Signed-off-by: Wenqi Li <wenqil@nvidia.com> ### Description this PR includes a few minor improvements - allow string input, such as `device="cuda:1"` for Workflow - skip stats key metric if `key_metric_name` is None (monai/handlers/stats_handler.py) - print incompatible metric values in the warning message (CheckpointSaver) - adds a `epochs` parameter for `ShuffleBuffer` (#5488) - adds a `output_name_formatter` for `SaveImage` (#5508) - simplify torch version in bundle init (#5529) ### Types of changes  - [x] Non-breaking change (fix or new feature that would not break existing functionality). - [ ] Breaking change (fix or new feature that would cause existing functionality to change). - [x] New tests added to cover the changes. - [ ] Integration tests passed locally by running `./runtests.sh -f -u --net --coverage`. - [x] Quick tests passed locally by running `./runtests.sh --quick --unittests --disttests`. - [ ] In-line docstrings updated. - [ ] Documentation updated, tested `make html` command in the `docs/` folder. Signed-off-by: Wenqi Li <wenqil@nvidia.com> Signed-off-by: Nic Ma <nma@nvidia.com> Co-authored-by: Nic Ma <nma@nvidia.com>
diff --git a/docs/source/modules.md b/docs/source/modules.md
@@ -264,7 +264,7 @@ A typical bundle example can include:
      ┗━ *license.txt
 ```
 Details about the bundle config definition and syntax & examples are at [config syntax](https://docs.monai.io/en/latest/config_syntax.html).
-A step-by-step [get started](https://github.com/Project-MONAI/tutorials/blob/master/modules/bundles/get_started.ipynb) tutorial notebook can help users quickly set up a bundle. [[bundle examples](https://github.com/Project-MONAI/tutorials/tree/main/bundle), [model-zoo](https://github.com/Project-MONAI/model-zoo)]
+A step-by-step [get started](https://github.com/Project-MONAI/tutorials/blob/master/bundle/get_started.md) tutorial notebook can help users quickly set up a bundle. [[bundle examples](https://github.com/Project-MONAI/tutorials/tree/main/bundle), [model-zoo](https://github.com/Project-MONAI/model-zoo)]
 
 ## Federated Learning
 
diff --git a/monai/bundle/utils.py b/monai/bundle/utils.py
@@ -32,7 +32,7 @@
     "version": "0.0.1",
     "changelog": {"0.0.1": "Initial version"},
     "monai_version": _conf_values["MONAI"],
-    "pytorch_version": _conf_values["Pytorch"],
+    "pytorch_version": str(_conf_values["Pytorch"]).split("+")[0].split("a")[0],  # 1.9.0a0+df837d0 or 1.13.0+cu117
     "numpy_version": _conf_values["Numpy"],
     "optional_packages_version": {},
     "task": "Describe what the network predicts",
diff --git a/monai/data/dataset.py b/monai/data/dataset.py
@@ -785,6 +785,7 @@ def __init__(
                 2. to execute `runtime cache` on GPU memory, must co-work with
                 `monai.data.DataLoader`, and can't work with `monai.data.DistributedSampler`
                 as GPU Tensor usually can't be shared in the multiprocessing context.
+                (try ``cache_dataset.disable_share_memory_cache()`` in case of GPU caching issues.)
 
         """
         if not isinstance(transform, Compose):
diff --git a/monai/data/folder_layout.py b/monai/data/folder_layout.py
@@ -9,10 +9,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import monai
 from monai.config import PathLike
 from monai.data.utils import create_file_basename
 
-__all__ = ["FolderLayout"]
+__all__ = ["FolderLayout", "default_name_formatter"]
+
+
+def default_name_formatter(metadict, saver):
+    """Returns a kwargs dict for :py:meth:`FolderLayout.filename`,
+    according to the input metadata and SaveImage transform."""
+    subject = metadict[monai.utils.ImageMetaKey.FILENAME_OR_OBJ] if metadict else getattr(saver, "_data_index", 0)
+    patch_index = metadict.get(monai.utils.ImageMetaKey.PATCH_INDEX, None) if metadict else None
+    return {"subject": f"{subject}", "idx": patch_index}
 
 
 class FolderLayout:
diff --git a/monai/data/iterable_dataset.py b/monai/data/iterable_dataset.py
@@ -71,6 +71,7 @@ class ShuffleBuffer(Randomizable, IterableDataset):
         seed: random seed to initialize the random state of all workers, set `seed += 1` in
             every iter() call, refer to the PyTorch idea:
             https://github.com/pytorch/pytorch/blob/v1.10.0/torch/utils/data/distributed.py#L98.
+        epochs: number of epochs to iterate over the dataset, default to 1, -1 means infinite epochs.
 
     Note:
         Both ``monai.data.DataLoader`` and ``torch.utils.data.DataLoader`` do not seed this class (as a subclass of
@@ -93,10 +94,11 @@ def run():
 
     """
 
-    def __init__(self, data, transform=None, buffer_size: int = 512, seed: int = 0) -> None:
+    def __init__(self, data, transform=None, buffer_size: int = 512, seed: int = 0, epochs: int = 1) -> None:
         super().__init__(data=data, transform=transform)
         self.size = buffer_size
         self.seed = seed
+        self.epochs = epochs
         self._idx = 0
 
     def randomized_pop(self, buffer):
@@ -123,7 +125,8 @@ def __iter__(self):
         """
         self.seed += 1
         super().set_random_state(seed=self.seed)  # make all workers in sync
-        yield from IterableDataset(self.generate_item(), transform=self.transform)
+        for _ in range(self.epochs) if self.epochs >= 0 else iter(int, 1):
+            yield from IterableDataset(self.generate_item(), transform=self.transform)
 
     def randomize(self, size: int) -> None:
         self._idx = self.R.randint(size)
diff --git a/monai/engines/evaluator.py b/monai/engines/evaluator.py
@@ -86,7 +86,7 @@ class Evaluator(Workflow):
 
     def __init__(
         self,
-        device: torch.device,
+        device: torch.device | str,
         val_data_loader: Iterable | DataLoader,
         epoch_length: int | None = None,
         non_blocking: bool = False,
diff --git a/monai/engines/multi_gpu_supervised_trainer.py b/monai/engines/multi_gpu_supervised_trainer.py
@@ -9,7 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import TYPE_CHECKING, Callable, Dict, Optional, Sequence, Tuple
+from typing import TYPE_CHECKING, Callable, Dict, Optional, Sequence, Tuple, Union
 
 import torch
 import torch.nn
@@ -55,7 +55,7 @@ def create_multigpu_supervised_trainer(
     net: torch.nn.Module,
     optimizer: Optimizer,
     loss_fn: Callable,
-    devices: Optional[Sequence[torch.device]] = None,
+    devices: Optional[Sequence[Union[str, torch.device]]] = None,
     non_blocking: bool = False,
     prepare_batch: Callable = _prepare_batch,
     output_transform: Callable = _default_transform,
@@ -105,7 +105,7 @@ def create_multigpu_supervised_trainer(
 def create_multigpu_supervised_evaluator(
     net: torch.nn.Module,
     metrics: Optional[Dict[str, Metric]] = None,
-    devices: Optional[Sequence[torch.device]] = None,
+    devices: Optional[Sequence[Union[str, torch.device]]] = None,
     non_blocking: bool = False,
     prepare_batch: Callable = _prepare_batch,
     output_transform: Callable = _default_eval_transform,
diff --git a/monai/engines/trainer.py b/monai/engines/trainer.py
@@ -134,7 +134,7 @@ class SupervisedTrainer(Trainer):
 
     def __init__(
         self,
-        device: torch.device,
+        device: str | torch.device,
         max_epochs: int,
         train_data_loader: Iterable | DataLoader,
         network: torch.nn.Module,
@@ -304,7 +304,7 @@ class GanTrainer(Trainer):
 
     def __init__(
         self,
-        device: torch.device,
+        device: str | torch.device,
         max_epochs: int,
         train_data_loader: DataLoader,
         g_network: torch.nn.Module,
diff --git a/monai/engines/utils.py b/monai/engines/utils.py
@@ -60,7 +60,7 @@ class IterationEvents(EventEnum):
     INNER_ITERATION_COMPLETED = "inner_iteration_completed"
 
 
-def get_devices_spec(devices: Optional[Sequence[torch.device]] = None) -> List[torch.device]:
+def get_devices_spec(devices: Optional[Sequence[Union[torch.device, str]]] = None) -> List[torch.device]:
     """
     Get a valid specification for one or more devices. If `devices` is None get devices for all CUDA devices available.
     If `devices` is and zero-length structure a single CPU compute device is returned. In any other cases `devices` is
@@ -88,7 +88,8 @@ def get_devices_spec(devices: Optional[Sequence[torch.device]] = None) -> List[t
     else:
         devices = list(devices)
 
-    return devices
+    devices = [torch.device(d) if isinstance(d, str) else d for d in devices]
+    return devices  # type: ignore
 
 
 def default_prepare_batch(
diff --git a/monai/engines/workflow.py b/monai/engines/workflow.py
@@ -93,7 +93,6 @@ class Workflow(IgniteEngine):  # type: ignore[valid-type, misc] # due to optiona
             https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast.
 
     Raises:
-        TypeError: When ``device`` is not a ``torch.Device``.
         TypeError: When ``data_loader`` is not a ``torch.utils.data.DataLoader``.
         TypeError: When ``key_metric`` is not a ``Optional[dict]``.
         TypeError: When ``additional_metrics`` is not a ``Optional[dict]``.
@@ -102,7 +101,7 @@ class Workflow(IgniteEngine):  # type: ignore[valid-type, misc] # due to optiona
 
     def __init__(
         self,
-        device: torch.device,
+        device: Union[torch.device, str],
         max_epochs: int,
         data_loader: Union[Iterable, DataLoader],
         epoch_length: Optional[int] = None,
@@ -125,8 +124,6 @@ def __init__(
             super().__init__(iteration_update)
         else:
             super().__init__(self._iteration)
-        if not isinstance(device, torch.device):
-            raise TypeError(f"Device must be a torch.device but is {type(device).__name__}.")
 
         if isinstance(data_loader, DataLoader):
             sampler = data_loader.__dict__["sampler"]
@@ -155,7 +152,7 @@ def set_sampler_epoch(engine: Engine):
             metrics={},
             metric_details={},
             dataloader=None,
-            device=device,
+            device=device if isinstance(device, torch.device) or device is None else torch.device(device),
             key_metric_name=None,  # we can set many metrics, only use key_metric to compare and save the best model
             best_metric=-1,
             best_metric_epoch=-1,
diff --git a/monai/handlers/checkpoint_saver.py b/monai/handlers/checkpoint_saver.py
@@ -165,6 +165,7 @@ def _score_func(engine: Engine):
                     warnings.warn(
                         "key metric is not a scalar value, skip metric comparison and don't save a model."
                         "please use other metrics as key metric, or change the `reduction` mode to 'mean'."
+                        f"got metric: {metric_name}={metric}."
                     )
                     return -1
                 return (-1 if key_metric_negative_sign else 1) * metric
diff --git a/monai/handlers/stats_handler.py b/monai/handlers/stats_handler.py
@@ -204,6 +204,7 @@ def _default_epoch_print(self, engine: Engine) -> None:
             hasattr(engine.state, "key_metric_name")
             and hasattr(engine.state, "best_metric")
             and hasattr(engine.state, "best_metric_epoch")
+            and engine.state.key_metric_name is not None  # type: ignore
         ):
             out_str = f"Key metric: {engine.state.key_metric_name} "
             out_str += f"best value: {engine.state.best_metric} "
diff --git a/monai/transforms/io/array.py b/monai/transforms/io/array.py
@@ -27,7 +27,7 @@
 
 from monai.config import DtypeLike, NdarrayOrTensor, PathLike
 from monai.data import image_writer
-from monai.data.folder_layout import FolderLayout
+from monai.data.folder_layout import FolderLayout, default_name_formatter
 from monai.data.image_reader import (
     ImageReader,
     ITKReader,
@@ -340,6 +340,8 @@ class SaveImage(Transform):
             the supported built-in writer classes are ``"NibabelWriter"``, ``"ITKWriter"``, ``"PILWriter"``.
         channel_dim: the index of the channel dimension. Default to `0`.
             `None` to indicate no channel dimension.
+        output_name_formatter: a callable function (returning a kwargs dict) to format the output file name.
+            see also: :py:func:`monai.data.folder_layout.default_name_formatter`.
     """
 
     def __init__(
@@ -360,6 +362,7 @@ def __init__(
         output_format: str = "",
         writer: Union[Type[image_writer.ImageWriter], str, None] = None,
         channel_dim: Optional[int] = 0,
+        output_name_formatter=None,
     ) -> None:
         self.folder_layout = FolderLayout(
             output_dir=output_dir,
@@ -390,6 +393,7 @@ def __init__(
         self.data_kwargs = {"squeeze_end_dims": squeeze_end_dims, "channel_dim": channel_dim}
         self.meta_kwargs = {"resample": resample, "mode": mode, "padding_mode": padding_mode, "dtype": dtype}
         self.write_kwargs = {"verbose": print_log}
+        self.fname_formatter = default_name_formatter if output_name_formatter is None else output_name_formatter
         self._data_index = 0
 
     def set_options(self, init_kwargs=None, data_kwargs=None, meta_kwargs=None, write_kwargs=None):
@@ -420,9 +424,8 @@ def __call__(self, img: Union[torch.Tensor, np.ndarray], meta_data: Optional[Dic
             meta_data: key-value pairs of metadata corresponding to the data.
         """
         meta_data = img.meta if isinstance(img, MetaTensor) else meta_data
-        subject = meta_data[Key.FILENAME_OR_OBJ] if meta_data else str(self._data_index)
-        patch_index = meta_data.get(Key.PATCH_INDEX, None) if meta_data else None
-        filename = self.folder_layout.filename(subject=f"{subject}", idx=patch_index)
+        kw = self.fname_formatter(meta_data, self)
+        filename = self.folder_layout.filename(**kw)
         if meta_data and len(ensure_tuple(meta_data.get("spatial_shape", ()))) == len(img.shape):
             self.data_kwargs["channel_dim"] = None
 
diff --git a/monai/transforms/io/dictionary.py b/monai/transforms/io/dictionary.py
@@ -233,6 +233,8 @@ class SaveImaged(MapTransform):
             if `None`, use the default writer from `monai.data.image_writer` according to `output_ext`.
             if it's a string, it's treated as a class name or dotted path;
             the supported built-in writer classes are ``"NibabelWriter"``, ``"ITKWriter"``, ``"PILWriter"``.
+        output_name_formatter: a callable function (returning a kwargs dict) to format the output file name.
+            see also: :py:func:`monai.data.folder_layout.default_name_formatter`.
 
     """
 
@@ -257,6 +259,7 @@ def __init__(
         print_log: bool = True,
         output_format: str = "",
         writer: Union[Type[image_writer.ImageWriter], str, None] = None,
+        output_name_formatter=None,
     ) -> None:
         super().__init__(keys, allow_missing_keys)
         self.meta_keys = ensure_tuple_rep(meta_keys, len(self.keys))
@@ -277,6 +280,7 @@ def __init__(
             print_log=print_log,
             output_format=output_format,
             writer=writer,
+            output_name_formatter=output_name_formatter,
         )
 
     def set_options(self, init_kwargs=None, data_kwargs=None, meta_kwargs=None, write_kwargs=None):
diff --git a/tests/test_deepedit_interaction.py b/tests/test_deepedit_interaction.py
@@ -92,7 +92,7 @@ def run_interaction(self, train):
 
         # set up engine
         engine = SupervisedTrainer(
-            device=torch.device("cpu"),
+            device="cpu",
             max_epochs=1,
             train_data_loader=data_loader,
             network=network,
diff --git a/tests/test_save_image.py b/tests/test_save_image.py
@@ -49,6 +49,7 @@ def test_saved_content(self, test_data, meta_data, output_ext, resample):
                 output_ext=output_ext,
                 resample=resample,
                 separate_folder=False,  # test saving into the same folder
+                output_name_formatter=lambda x, xform: dict(subject=x["filename_or_obj"] if x else "0"),
             )
             trans(test_data)
 
diff --git a/tests/test_shuffle_buffer.py b/tests/test_shuffle_buffer.py
@@ -37,6 +37,11 @@ def test_shape(self):
             np.testing.assert_allclose(output, [[2, 3], [1, 4]], err_msg=f"seed {buffer.seed}")
             np.testing.assert_allclose(output2, [[1, 4], [2, 3]], err_msg=f"seed {buffer.seed}")
 
+    def test_epochs(self):
+        buffer = ShuffleBuffer([1, 2, 3, 4], seed=0, epochs=2)
+        output = [convert_data_type(x, np.ndarray)[0] for x in DataLoader(dataset=buffer, batch_size=2)]
+        np.testing.assert_allclose(output, [[2, 1], [3, 4], [4, 2], [3, 1]])
+
 
 if __name__ == "__main__":
     unittest.main()

Original file line number	Diff line number	Diff line change
`@@ -165,6 +165,7 @@ def _score_func(engine: Engine):`
`165`	`165`	`warnings.warn(`
`166`	`166`	`"key metric is not a scalar value, skip metric comparison and don't save a model."`
`167`	`167`	"please use other metrics as key metric, or change the `reduction` mode to 'mean'."
	`168`	`+ f"got metric: {metric_name}={metric}."`
`168`	`169`	`)`
`169`	`170`	`return -1`
`170`	`171`	`return (-1 if key_metric_negative_sign else 1) * metric`
Original file line number	Diff line number	Diff line change
`@@ -49,6 +49,7 @@ def test_saved_content(self, test_data, meta_data, output_ext, resample):`
`49`	`49`	`output_ext=output_ext,`
`50`	`50`	`resample=resample,`
`51`	`51`	`separate_folder=False, # test saving into the same folder`
	`52`	`+ output_name_formatter=lambda x, xform: dict(subject=x["filename_or_obj"] if x else "0"),`
`52`	`53`	`)`
`53`	`54`	`trans(test_data)`
`54`	`55`