Implement SlidingWindowHoVerNetInferer (#5531)

bhashemian · pre-commit-ci[bot] · web-flow · commit e4b99e15353a · 2022-11-18T04:26:33.000Z
Fixes #5521 ### Description This PR implement `SlidingWindowHoVerNetInferer` that with HoVerNet model, where the output size is different than the input but cannot be scaled and should be padded and then cropped. ### Types of changes  - [x] Non-breaking change (fix or new feature that would not break existing functionality). - [x] New tests added to cover the changes. - [x] Integration tests passed locally by running `./runtests.sh -f -u --net --coverage`. - [x] Quick tests passed locally by running `./runtests.sh --quick --unittests --disttests`. - [x] In-line docstrings updated. Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
diff --git a/docs/source/apps.rst b/docs/source/apps.rst
@@ -92,6 +92,10 @@ Applications
 .. autoclass:: ProbMapProducer
     :members:
 
+.. automodule:: monai.apps.pathology.inferers
+.. autoclass:: SlidingWindowHoVerNetInferer
+    :members:
+
 .. automodule:: monai.apps.pathology.losses.hovernet_loss
 .. autoclass:: HoVerNetLoss
     :members:
diff --git a/monai/apps/pathology/inferers/__init__.py b/monai/apps/pathology/inferers/__init__.py
@@ -0,0 +1,12 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .inferer import SlidingWindowHoVerNetInferer
diff --git a/monai/apps/pathology/inferers/inferer.py b/monai/apps/pathology/inferers/inferer.py
@@ -0,0 +1,205 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+from monai.inferers import SlidingWindowInferer
+from monai.inferers.utils import sliding_window_inference
+from monai.utils import BlendMode, PytorchPadMode, look_up_option
+
+__all__ = ["SlidingWindowHoVerNetInferer"]
+
+
+class SlidingWindowHoVerNetInferer(SlidingWindowInferer):
+    """
+    Sliding window method for HoVerNet model inference,
+    with `sw_batch_size` windows for every model.forward().
+    Usage example can be found in the :py:class:`monai.inferers.Inferer` base class.
+
+    Args:
+        roi_size: the window size to execute SlidingWindow evaluation.
+            If it has non-positive components, the corresponding `inputs` size will be used.
+            if the components of the `roi_size` are non-positive values, the transform will use the
+            corresponding components of img size. For example, `roi_size=(32, -1)` will be adapted
+            to `(32, 64)` if the second spatial dimension size of img is `64`.
+        sw_batch_size: the batch size to run window slices.
+        overlap: Amount of overlap between scans.
+        mode: {``"constant"``, ``"gaussian"``}
+            How to blend output of overlapping windows. Defaults to ``"constant"``.
+
+            - ``"constant``": gives equal weight to all predictions.
+            - ``"gaussian``": gives less weight to predictions on edges of windows.
+
+        sigma_scale: the standard deviation coefficient of the Gaussian window when `mode` is ``"gaussian"``.
+            Default: 0.125. Actual window sigma is ``sigma_scale`` * ``dim_size``.
+            When sigma_scale is a sequence of floats, the values denote sigma_scale at the corresponding
+            spatial dimensions.
+        padding_mode: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}
+            Padding mode when ``roi_size`` is larger than inputs. Defaults to ``"constant"``
+            See also: https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
+        cval: fill value for 'constant' padding mode. Default: 0
+        sw_device: device for the window data.
+            By default the device (and accordingly the memory) of the `inputs` is used.
+            Normally `sw_device` should be consistent with the device where `predictor` is defined.
+        device: device for the stitched output prediction.
+            By default the device (and accordingly the memory) of the `inputs` is used. If for example
+            set to device=torch.device('cpu') the gpu memory consumption is less and independent of the
+            `inputs` and `roi_size`. Output is on the `device`.
+        progress: whether to print a tqdm progress bar.
+        cache_roi_weight_map: whether to pre-compute the ROI weight map.
+        cpu_thresh: when provided, dynamically switch to stitching on cpu (to save gpu memory)
+            when input image volume is larger than this threshold (in pixels/voxels).
+            Otherwise use ``"device"``. Thus, the output may end-up on either cpu or gpu.
+        extra_input_padding: the amount of padding for the input image, which is a tuple of even number of pads.
+            Refer to to the `pad` argument of `torch.nn.functional.pad` for more details.
+
+    Note:
+        ``sw_batch_size`` denotes the max number of windows per network inference iteration,
+        not the batch size of inputs.
+
+    """
+
+    def __init__(
+        self,
+        roi_size: Union[Sequence[int], int],
+        sw_batch_size: int = 1,
+        overlap: float = 0.25,
+        mode: Union[BlendMode, str] = BlendMode.CONSTANT,
+        sigma_scale: Union[Sequence[float], float] = 0.125,
+        padding_mode: Union[PytorchPadMode, str] = PytorchPadMode.CONSTANT,
+        cval: float = 0.0,
+        sw_device: Optional[Union[torch.device, str]] = None,
+        device: Optional[Union[torch.device, str]] = None,
+        progress: bool = False,
+        cache_roi_weight_map: bool = False,
+        cpu_thresh: Optional[int] = None,
+        extra_input_padding: Optional[Tuple[int]] = None,
+    ) -> None:
+        super().__init__(
+            roi_size=roi_size,
+            sw_batch_size=sw_batch_size,
+            overlap=overlap,
+            mode=mode,
+            sigma_scale=sigma_scale,
+            padding_mode=padding_mode,
+            cval=cval,
+            sw_device=sw_device,
+            device=device,
+            progress=progress,
+            cache_roi_weight_map=cache_roi_weight_map,
+            cpu_thresh=cpu_thresh,
+        )
+        self.extra_input_padding = extra_input_padding
+
+    def process_output(self, seg_prob_tuple, window_data, importance_map_):
+        window_shape = window_data.shape[2:]
+        seg_shape = seg_prob_tuple[0].shape[2:]
+
+        window_pad_size = []
+        window_pad_slices = []
+        for window_s, output_s in zip(window_shape, seg_shape):
+            pad_width = max(window_s - output_s, 0)
+            pad_half_1 = pad_width // 2
+            pad_half_2 = pad_width - pad_half_1
+            window_pad_size.extend([pad_half_1, pad_half_2])
+            window_pad_slices.append(slice(pad_half_1, window_s - pad_half_2))
+
+        # Make the padding area of the importance map zero
+        importance_map = torch.zeros(window_shape, dtype=importance_map_.dtype, device=importance_map_.device)
+        importance_map[window_pad_slices] = importance_map_[window_pad_slices]
+
+        seg_prob_tuple = tuple(
+            F.pad(seg_prob, pad=tuple(window_pad_size), mode=self.padding_mode, value=self.cval)
+            for seg_prob in seg_prob_tuple
+        )
+
+        return seg_prob_tuple, importance_map
+
+    def __call__(
+        self,
+        inputs: torch.Tensor,
+        network: Callable[..., Union[torch.Tensor, Sequence[torch.Tensor], Dict[Any, torch.Tensor]]],
+        *args: Any,
+        **kwargs: Any,
+    ) -> Union[torch.Tensor, Tuple[torch.Tensor, ...], Dict[Any, torch.Tensor]]:
+        """
+
+        Args:
+            inputs: model input data for inference.
+            network: target model to execute inference.
+                supports callables such as ``lambda x: my_torch_model(x, additional_config)``
+            args: optional args to be passed to ``network``.
+            kwargs: optional keyword args to be passed to ``network``.
+
+        """
+
+        device = self.device
+        if device is None and self.cpu_thresh is not None and inputs.shape[2:].numel() > self.cpu_thresh:
+            device = "cpu"  # stitch in cpu memory if image is too large
+
+        if self.extra_input_padding:
+            image_size_original = inputs.shape[2:]
+            num_spatial_dims = len(image_size_original)
+            inputs = F.pad(
+                inputs,
+                pad=tuple(self.extra_input_padding),
+                mode=look_up_option(self.padding_mode, PytorchPadMode),
+                value=self.cval,
+            )
+
+        results = sliding_window_inference(
+            inputs,
+            self.roi_size,
+            self.sw_batch_size,
+            network,
+            self.overlap,
+            self.mode,
+            self.sigma_scale,
+            self.padding_mode,
+            self.cval,
+            self.sw_device,
+            device,
+            self.progress,
+            self.roi_weight_map,
+            self.process_output,
+            *args,
+            **kwargs,
+        )
+
+        if self.extra_input_padding:
+            extra_slicing: List[slice] = []
+            num_padded_dims = len(self.extra_input_padding) // 2
+            for sp in range(num_padded_dims):
+                slice_dim = slice(
+                    self.extra_input_padding[sp * 2],
+                    image_size_original[num_spatial_dims - sp - 1] + self.extra_input_padding[sp * 2],
+                )
+                extra_slicing.insert(0, slice_dim)
+            for _ in range(len(inputs.shape) - num_padded_dims):
+                extra_slicing.insert(0, slice(None))
+
+            if isinstance(results, dict):
+                for k, v in results.items():
+                    results[k] = v[extra_slicing]
+            elif isinstance(results, (list, tuple)):
+                results = type(results)([res[extra_slicing] for res in results])
+            elif isinstance(results, (torch.Tensor, np.ndarray)):
+                results = results[extra_slicing]
+            else:
+                raise ValueError(
+                    f"The output [{type(results)}] should be either dict, list, tuple, torch.Tensor, or numpy array."
+                )
+
+        return results
diff --git a/monai/inferers/inferer.py b/monai/inferers/inferer.py
@@ -213,6 +213,7 @@ def __call__(
             device,
             self.progress,
             self.roi_weight_map,
+            None,
             *args,
             **kwargs,
         )
diff --git a/monai/inferers/utils.py b/monai/inferers/utils.py
@@ -10,7 +10,7 @@
 # limitations under the License.
 
 import warnings
-from typing import Any, Callable, Dict, List, Mapping, Sequence, Tuple, Union
+from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Tuple, Union
 
 import torch
 import torch.nn.functional as F
@@ -47,7 +47,8 @@ def sliding_window_inference(
     sw_device: Union[torch.device, str, None] = None,
     device: Union[torch.device, str, None] = None,
     progress: bool = False,
-    roi_weight_map: Union[torch.Tensor, None] = None,
+    roi_weight_map: Optional[torch.Tensor] = None,
+    process_fn: Optional[Callable] = None,
     *args: Any,
     **kwargs: Any,
 ) -> Union[torch.Tensor, Tuple[torch.Tensor, ...], Dict[Any, torch.Tensor]]:
@@ -108,6 +109,7 @@ def sliding_window_inference(
         progress: whether to print a `tqdm` progress bar.
         roi_weight_map: pre-computed (non-negative) weight map for each ROI.
             If not given, and ``mode`` is not `constant`, this map will be computed on the fly.
+        process_fn: process inference output and adjust the importance map per window
         args: optional args to be passed to ``predictor``.
         kwargs: optional keyword args to be passed to ``predictor``.
 
@@ -149,19 +151,21 @@ def sliding_window_inference(
     # Create window-level importance map
     valid_patch_size = get_valid_patch_size(image_size, roi_size)
     if valid_patch_size == roi_size and (roi_weight_map is not None):
-        importance_map = roi_weight_map
+        importance_map_ = roi_weight_map
     else:
         try:
-            importance_map = compute_importance_map(valid_patch_size, mode=mode, sigma_scale=sigma_scale, device=device)
+            importance_map_ = compute_importance_map(
+                valid_patch_size, mode=mode, sigma_scale=sigma_scale, device=device
+            )
         except BaseException as e:
             raise RuntimeError(
                 "Seems to be OOM. Please try smaller patch size or mode='constant' instead of mode='gaussian'."
             ) from e
-    importance_map = convert_data_type(importance_map, torch.Tensor, device, compute_dtype)[0]
+    importance_map_ = convert_data_type(importance_map_, torch.Tensor, device, compute_dtype)[0]
 
     # handle non-positive weights
-    min_non_zero = max(importance_map[importance_map != 0].min().item(), 1e-3)
-    importance_map = torch.clamp(importance_map.to(torch.float32), min=min_non_zero).to(compute_dtype)
+    min_non_zero = max(importance_map_[importance_map_ != 0].min().item(), 1e-3)
+    importance_map_ = torch.clamp(importance_map_.to(torch.float32), min=min_non_zero).to(compute_dtype)
 
     # Perform predictions
     dict_key, output_image_list, count_map_list = None, [], []
@@ -193,6 +197,11 @@ def sliding_window_inference(
             seg_prob_tuple = ensure_tuple(seg_prob_out)
             is_tensor_output = False
 
+        if process_fn:
+            seg_prob_tuple, importance_map = process_fn(seg_prob_tuple, window_data, importance_map_)
+        else:
+            importance_map = importance_map_
+
         # for each output in multi-output list
         for ss, seg_prob in enumerate(seg_prob_tuple):
             seg_prob = seg_prob.to(device)  # BxCxMxNxP or BxCxMxN
diff --git a/tests/test_sliding_window_hovernet_inference.py b/tests/test_sliding_window_hovernet_inference.py
diff --git a/tests/test_sliding_window_inference.py b/tests/test_sliding_window_inference.py

Original file line number	Diff line number	Diff line change
`@@ -213,6 +213,7 @@ def __call__(`
`213`	`213`	`device,`
`214`	`214`	`self.progress,`
`215`	`215`	`self.roi_weight_map,`
	`216`	`+ None,`
`216`	`217`	`*args,`
`217`	`218`	`**kwargs,`
`218`	`219`	`)`