Add attributes for PT ZCC (aws#289)

jarednielsen · web-flow · commit 871eb74930d0 · 2019-10-22T14:19:26.000-07:00
diff --git a/tests/pytorch/test_distributed_training.py b/tests/pytorch/test_distributed_training.py
@@ -8,6 +8,7 @@
 """
 import numpy as nn
 import os
+import pytest
 import torch
 import torch.distributed as dist
 from torch.multiprocessing import Process
@@ -110,6 +111,7 @@ def init_processes(rank, size, fn, backend="gloo"):
     fn(rank, size)
 
 
+@pytest.mark.slow  # 0:05 to run
 def test_run_net_single_process():
     """Runs a single linear layer."""
     ts.reset_collections()
diff --git a/tests/pytorch/test_loss.py b/tests/pytorch/test_loss.py
@@ -31,6 +31,7 @@ def forward(self, x):
         return x
 
 
+@pytest.mark.slow  # 0:05 to run
 def test_register_loss():
     """Test that the loss is saved as a tensor."""
     ts.reset_collections()
diff --git a/tornasole/pytorch/collection.py b/tornasole/pytorch/collection.py
@@ -24,7 +24,7 @@ def _register_default_collections(self):
         self.get(CollectionKeys.WEIGHTS).include("^(?!gradient).*weight")
         self.get(CollectionKeys.BIASES).include("^(?!gradient).*bias")
         self.get(CollectionKeys.GRADIENTS).include("^gradient")
-        self.get(CollectionKeys.LOSSES).include("Loss")
+        self.get(CollectionKeys.LOSSES).include("[Ll]oss")
 
     def create_collection(self, name):
         super().create_collection(name, cls=Collection)
diff --git a/tornasole/pytorch/hook.py b/tornasole/pytorch/hook.py
@@ -1,4 +1,6 @@
 from copy import deepcopy
+import types
+from typing import Callable, Union
 import torch
 import torch.distributed as dist
 from tornasole.core.json_config import (
@@ -53,6 +55,9 @@ def __init__(
         self.model = None
         self.exported_model = False
 
+        self.has_registered_module = False
+        self.has_registered_loss_module = False
+
         set_hook(self)
 
     def get_num_workers(self):
@@ -155,6 +160,14 @@ def forward_pre_hook(self, module, inputs):
             self.export_collections()
             self.exported_collections = True
 
+    def record_tensor_value(self, tensor_name: str, tensor_value: torch.Tensor) -> None:
+        """Used for registering functional directly, such as F.mse_loss()."""
+        assert isinstance(
+            tensor_value, torch.Tensor
+        ), f"tensor_value={tensor_value} must be torch.Tensor"
+
+        self._write_outputs(tensor_name, tensor_value)
+
     # This hook is invoked by trainer after running the forward pass.
     def forward_hook(self, module, inputs, outputs):
         if not self._get_collections_to_save_for_step():
@@ -228,6 +241,8 @@ def register_hook(self, module):
         # Capture the gradient for each parameter in the net
         self._backward_apply(module)
 
+        self.has_registered_module = True
+
     def register_loss(self, loss_module):
         """Register something like `criterion = nn.CrossEntropyLoss()`."""
         # Typechecking
@@ -240,6 +255,7 @@ def register_loss(self, loss_module):
         self.module_maps[loss_module] = name
         # Add a callback to the forward pass
         loss_module.register_forward_hook(self.forward_hook)
+        self.has_registered_loss_module = True
 
     @staticmethod
     def _get_reduction_of_data(reduction_name, tensor_value, tensor_name, abs):