atqy
diff --git a/‎tests/zero_code_change/pt_utils.py
Lines changed: 45 additions & 0 deletions b/‎tests/zero_code_change/pt_utils.py
Lines changed: 45 additions & 0 deletions
diff --git a/‎tests/zero_code_change/pytorch_integration_tests.py
Lines changed: 93 additions & 0 deletions b/‎tests/zero_code_change/pytorch_integration_tests.py
Lines changed: 93 additions & 0 deletions
diff --git a/‎tests/zero_code_change/tensorflow_integration_tests.py
Lines changed: 152 additions & 0 deletions b/‎tests/zero_code_change/tensorflow_integration_tests.py
Lines changed: 152 additions & 0 deletions
@@ -0,0 +1,45 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import torchvision
+import torchvision.transforms as transforms
+
+
+def get_dataloaders() -> Tuple[torch.utils.data.DataLoader, torch.utils.data.DataLoader]:
+    transform = transforms.Compose(
+        [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
+    )
+
+    trainset = torchvision.datasets.CIFAR10(
+        root="./data", train=True, download=True, transform=transform
+    )
+    trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)
+
+    testset = torchvision.datasets.CIFAR10(
+        root="./data", train=False, download=True, transform=transform
+    )
+    testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)
+
+    classes = ("plane", "car", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck")
+    return trainloader, testloader
+
+
+class Net(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv1 = nn.Conv2d(3, 6, 5)
+        self.pool = nn.MaxPool2d(2, 2)
+        self.conv2 = nn.Conv2d(6, 16, 5)
+        self.fc1 = nn.Linear(16 * 5 * 5, 120)
+        self.fc2 = nn.Linear(120, 84)
+        self.fc3 = nn.Linear(84, 10)
+
+    def forward(self, x):
+        x = self.pool(F.relu(self.conv1(x)))
+        x = self.pool(F.relu(self.conv2(x)))
+        x = x.view(-1, 16 * 5 * 5)
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        x = self.fc3(x)
+        return x
@@ -0,0 +1,93 @@
+"""
+WARNING: This must be run manually, with the custom TensorFlow fork installed.
+Not used in CI/CD. May be useful for DLC testing.
+
+We'll import a forked version of PyTorch, then run the MNIST tutorial at
+https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html.
+This should work without changing anything from the tutorial.
+Afterwards, we read from the directory and ensure that all the values are there.
+"""
+import argparse
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+
+import tornasole.pytorch as ts
+from tornasole.core.utils import SagemakerSimulator
+from pt_utils import get_dataloaders, Net
+
+
+def test_pytorch(script_mode: bool):
+    with SagemakerSimulator() as sim:
+        trainloader, testloader = get_dataloaders()
+        net = Net()
+        criterion = nn.CrossEntropyLoss()
+        optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
+
+        if script_mode:
+            hook = ts.TornasoleHook(out_dir=sim.out_dir)
+            hook.register_hook(net)
+            hook.register_loss(criterion)
+
+        for epoch in range(1):  # loop over the dataset multiple times
+            running_loss = 0.0
+            for i, data in enumerate(trainloader, 0):
+                # get the inputs; data is a list of [inputs, labels]
+                inputs, labels = data
+
+                # zero the parameter gradients
+                optimizer.zero_grad()
+
+                # forward + backward + optimize
+                outputs = net(inputs)
+                if True:
+                    loss = criterion(outputs, labels)
+                else:
+                    loss = F.cross_entropy(outputs, labels)
+                loss.backward()
+                optimizer.step()
+
+                # print statistics
+                running_loss += loss.item()
+                if i % 2000 == 1999:  # print every 2000 mini-batches
+                    print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1, running_loss / 2000))
+                    running_loss = 0.0
+                    break
+
+        print("Finished Training")
+
+        from tornasole.trials import Trial, create_trial
+        import tornasole_rules
+
+        trial = create_trial(path=sim.out_dir)
+        print(f"trial.available_steps() = {trial.available_steps()}")
+        print(f"trial.tensors() = {trial.tensors()}")
+
+        print(f"collection_manager = {hook.collection_manager}")
+
+        weights_tensors = hook.collection_manager.get("weights").tensor_names
+        print(f"'weights' collection tensors = {weights_tensors}")
+        assert len(weights_tensors) > 0
+
+        gradients_tensors = hook.collection_manager.get("gradients").tensor_names
+        print(f"'gradients' collection tensors = {gradients_tensors}")
+        assert len(gradients_tensors) > 0
+
+        losses_tensors = hook.collection_manager.get("losses").tensor_names
+        print(f"'losses' collection tensors = {losses_tensors}")
+        assert len(losses_tensors) > 0
+
+        assert all(
+            [name in trial.tensors() for name in hook.collection_manager.get("losses").tensor_names]
+        )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--script-mode", help="Manually create hooks instead of relying on ZCC", action="store_true"
+    )
+    args = parser.parse_args()
+
+    test_pytorch(script_mode=args.script_mode)
@@ -0,0 +1,152 @@
+"""
+WARNING: This must be run manually, with the custom TensorFlow fork installed.
+Not used in CI/CD. May be useful for DLC testing.
+
+Be sure to run with Python2 (/usr/bin/python) and Python3.
+Run with and without the flag --zcc.
+
+Test with DNNClassifier and raw Estimator.
+Test with Session.
+Test with Keras.
+
+Test with AdamOptimizer and SGD.
+
+We check that certain tensors are saved.
+"""
+
+import argparse
+import numpy as np
+import random
+import tensorflow as tf
+import tornasole.tensorflow as ts
+from tornasole.core.utils import SagemakerSimulator
+from tf_utils import (
+    get_estimator,
+    get_input_fns,
+    get_train_op_and_placeholders,
+    get_data,
+    get_keras_data,
+    get_keras_model_v1,
+)
+
+
+def test_estimator(script_mode: bool):
+    """ Throws errors about tensors not saving to collection. Investigate after merging PR #304.
+    """
+    with SagemakerSimulator() as sim:
+        # Setup
+        mnist_classifier = get_estimator()
+        train_input_fn, eval_input_fn = get_input_fns()
+
+        # Train and evaluate
+        if script_mode:
+            hook = ts.TornasoleEstimatorHook(out_dir=sim.out_dir)
+            mnist_classifier.train(input_fn=train_input_fn, steps=50)
+            mnist_classifier.evaluate(input_fn=eval_input_fn, steps=10)
+        else:
+            mnist_classifier.train(input_fn=train_input_fn, steps=50)
+            mnist_classifier.evaluate(input_fn=eval_input_fn, steps=10)
+
+        # Check that hook created and tensors saved
+        trial = ts.create_trial(path=sim.out_dir)
+        assert ts.get_hook() is not None, "Hook was not created."
+        assert len(trial.available_steps()) > 0, "Nothing saved at any step."
+        assert len(trial.tensors()) > 0, "Tensors were not saved."
+
+
+def test_linear_classifier(script_mode: bool):
+    """ Throws errors about tensors not saving to collection. Investigate after merging PR #304.
+    """
+    with SagemakerSimulator() as sim:
+        # Setup
+        train_input_fn, eval_input_fn = get_input_fns()
+        x_feature = tf.feature_column.numeric_column("x", shape=(28, 28))
+        estimator = tf.compat.v1.estimator.LinearClassifier(
+            feature_columns=[x_feature], model_dir="/tmp/mnist_linear_classifier", n_classes=10
+        )
+
+        # Train
+        if script_mode:
+            hook = ts.TornasoleEstimatorHook(out_dir=sim.out_dir)
+            estimator.train(input_fn=train_input_fn, steps=100, hooks=[hook])
+        else:
+            estimator.train(input_fn=train_input_fn, steps=100, hooks=[hook])
+
+        # Check that hook created and tensors saved
+        trial = ts.create_trial(path=sim.out_dir)
+        assert ts.get_hook() is not None, "Hook was not created."
+        assert len(trial.available_steps()) > 0, "Nothing saved at any step."
+        assert len(trial.tensors()) > 0, "Tensors were not saved."
+
+
+def test_monitored_session(script_mode: bool):
+    """ Works as intended. """
+    with SagemakerSimulator() as sim:
+        train_op, X, Y = get_train_op_and_placeholders()
+        init = tf.compat.v1.global_variables_initializer()
+        mnist = get_data()
+
+        if script_mode:
+            hook = ts.TornasoleKerasHook(out_dir=sim.out_dir)
+            sess = tf.train.MonitoredSession(hooks=[hook])
+        else:
+            sess = tf.train.MonitoredSession()
+
+        with sess:
+            sess.run(init)
+            for step in range(1, 101):
+                batch_x, batch_y = mnist.train.next_batch(32)
+                sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
+
+        # Check that hook created and tensors saved
+        trial = ts.create_trial(path=sim.out_dir)
+        assert ts.get_hook() is not None, "Hook was not created."
+        assert len(trial.available_steps()) > 0, "Nothing saved at any step."
+        assert len(trial.tensors()) > 0, "Tensors were not saved."
+
+
+def test_keras_v1(script_mode: bool):
+    """ Failing because we need TornasoleKerasHook from PR #304.
+
+    Taken from https://www.tensorflow.org/guide/keras/functional.
+    """
+    with SagemakerSimulator() as sim:
+        import tensorflow.compat.v1.keras as keras
+
+        model = get_keras_model_v1()
+        (x_train, y_train), (x_test, y_test) = get_keras_data()
+
+        model.compile(
+            loss="sparse_categorical_crossentropy",
+            optimizer=keras.optimizers.RMSprop(),
+            metrics=["accuracy"],
+        )
+        if script_mode:
+            hook = ts.TornasoleKerasHook(out_dir=sim.out_dir)
+            history = model.fit(
+                x_train, y_train, batch_size=64, epochs=5, validation_split=0.2, callbacks=[hook]
+            )
+            test_scores = model.evaluate(x_test, y_test, verbose=2, callbacks=[hook])
+        else:
+            history = model.fit(x_train, y_train, batch_size=64, epochs=5, validation_split=0.2)
+            test_scores = model.evaluate(x_test, y_test, verbose=2)
+
+        # Check that hook created and tensors saved
+        trial = ts.create_trial(path=sim.out_dir)
+        assert ts.get_hook() is not None, "Hook was not created."
+        assert len(trial.available_steps()) > 0, "Nothing saved at any step."
+        assert len(trial.tensors()) > 0, "Tensors were not saved."
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--script-mode", help="Manually create hooks instead of relying on ZCC", action="store_true"
+    )
+    args = parser.parse_args()
+    script_mode = args.script_mode
+
+    test_estimator(script_mode=script_mode)
+    test_monitored_session(script_mode=script_mode)
+    test_linear_classifier(script_mode=script_mode)
+    test_keras_v1(script_mode=script_mode)