Skip to content

Commit 18b8094

Browse files
authored
Replace 'collections.ts' with COLLECTIONS_FILE_NAME (aws#233)
Replace 'collections.ts' with COLLECTIONS_FILE_NAME
1 parent 09ca805 commit 18b8094

File tree

18 files changed

+55
-44
lines changed

18 files changed

+55
-44
lines changed

tests/analysis/rules/test_confusion.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from tornasole.core.writer import FileWriter
2-
from tornasole.core.collection_manager import CollectionManager
2+
from tornasole.core.collection_manager import CollectionManager, COLLECTIONS_FILE_NAME
33

44
from tornasole.rules.generic import Confusion
55
from tornasole.trials import create_trial
@@ -20,7 +20,7 @@ def gen_y_and_y_hat( path, trial, step, y, y_name, y_hat, y_hat_name, colls = {}
2020
for coll in colls:
2121
c.add(coll)
2222
c.get(coll).tensor_names = colls[coll]
23-
c.export(os.path.join(trial_dir, "collections.ts"))
23+
c.export(os.path.join(trial_dir, COLLECTIONS_FILE_NAME))
2424

2525

2626
def test_confusion():

tests/analysis/trials/test_modes.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from tornasole.core.tensor import StepState
66
from datetime import datetime
77
from tornasole.core.writer import FileWriter
8-
from tornasole.core.collection_manager import CollectionManager
8+
from tornasole.core.collection_manager import CollectionManager, COLLECTIONS_FILE_NAME
99

1010
def test_modes_on_global_data():
1111
pass # other tests in create, local, s3 do this
@@ -18,7 +18,7 @@ def test_mode_data():
1818
c = CollectionManager()
1919
c.add("default")
2020
c.get("default").tensor_names = ["arr"]
21-
c.export(os.path.join(trial_dir, "collections.ts"))
21+
c.export(os.path.join(trial_dir, COLLECTIONS_FILE_NAME))
2222
tr = create_trial(trial_dir)
2323
for s in range(0, 10):
2424
fw = FileWriter(trial_dir=trial_dir, step=s)

tests/analysis/trials/test_s3.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33

44
from tornasole.core.access_layer.s3handler import *
55
from tornasole.trials import S3Trial
6-
from tornasole.core.collection_manager import CollectionManager
6+
from tornasole.core.collection_manager import CollectionManager, \
7+
COLLECTIONS_FILE_NAME
78
import uuid
89
import os
910
from tornasole.core.utils import is_s3
@@ -48,8 +49,8 @@ def help_test_multiple_trials(num_steps = 20, num_tensors = 10):
4849
c = CollectionManager()
4950
c.add("default")
5051
c.get("default").tensor_names = ["foo_" + str(i) for i in range(num_tensors)]
51-
c.export(path + trial_name + "/collections.ts")
52-
c.export(path + trial_name + "/collections.ts")
52+
c.export(path + trial_name + "/" + COLLECTIONS_FILE_NAME)
53+
c.export(path + trial_name + "/" + COLLECTIONS_FILE_NAME)
5354
for i in range(num_steps):
5455
generate_data(path=path, trial=trial_name, num_tensors=num_tensors,
5556
step=i, tname_prefix='foo', worker='algo-1', shape=(3, 3, 3), rank=0)

tests/analysis/utils.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from tornasole.core.writer import FileWriter
22
import numpy as np
3-
from tornasole.core.collection_manager import CollectionManager
3+
from tornasole.core.collection_manager import CollectionManager, \
4+
COLLECTIONS_FILE_NAME
45
import os
56

67

@@ -21,7 +22,7 @@ def generate_data(path, trial, step, tname_prefix,
2122
c.get("default").tensor_names = [f'{tname_prefix}_{i}' for i in range(num_tensors)]
2223
c.add('gradients')
2324
c.get("gradients").tensor_names = [f'{tname_prefix}_{i}' for i in range(num_tensors)]
24-
c.export(os.path.join(path, trial, "collections.ts"))
25+
c.export(os.path.join(path, trial, COLLECTIONS_FILE_NAME))
2526

2627

2728
def check_trial(trial_obj, num_steps, num_tensors):

tests/core/test_collections.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from tornasole.core.collection import Collection
2-
from tornasole.core.collection_manager import CollectionManager
2+
from tornasole.core.collection_manager import CollectionManager, \
3+
COLLECTIONS_FILE_NAME
34
from tornasole.core.reduction_config import ReductionConfig
45

56
def test_export_load():
@@ -20,8 +21,8 @@ def test_manager_export_load():
2021
cm.add(Collection('trial1'))
2122
cm.add('trial2')
2223
cm.get('trial2').include('total_loss')
23-
cm.export('cm.ts')
24-
cm2 = CollectionManager.load('cm.ts')
24+
cm.export(COLLECTIONS_FILE_NAME)
25+
cm2 = CollectionManager.load(COLLECTIONS_FILE_NAME)
2526
assert cm == cm2
2627

2728
def test_manager():

tests/tensorflow/hooks/test_save_all_full.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import shutil, glob
44
from tornasole.core.reader import FileReader
55
from tornasole.core.json_config import TORNASOLE_CONFIG_FILE_PATH_ENV_STR
6+
from tornasole.core.collection_manager import COLLECTIONS_FILE_NAME
67

78

89
def test_save_all_full(hook=None, trial_dir=None):
@@ -28,18 +29,15 @@ def test_save_all_full(hook=None, trial_dir=None):
2829
assert len(coll['gradients'].tensor_names) == 1
2930
assert len(coll['losses'].tensor_names) == 1
3031

31-
assert 'collections.ts' in files
32-
cm = CollectionManager.load(join(trial_dir, 'collections.ts'))
32+
assert COLLECTIONS_FILE_NAME in files
33+
cm = CollectionManager.load(join(trial_dir, COLLECTIONS_FILE_NAME))
3334

3435
assert len(cm.collections) == 6
3536
assert len(cm.collections['weights'].tensor_names) == 1
36-
assert len(cm.collections['weights'].reduction_tensor_names) == 0
3737
assert len(cm.collections['losses'].tensor_names) == 1
3838
assert len(cm.collections['gradients'].tensor_names) == 1
39-
assert len(cm.collections['gradients'].reduction_tensor_names) == 0
4039
# as we hadn't asked to be saved
4140
assert len(cm.collections['optimizer_variables'].tensor_names) == 0
42-
assert len(cm.collections['optimizer_variables'].reduction_tensor_names) == 0
4341
assert len(cm.collections['all'].tensor_names) == 106
4442
num_tensors_loaded_collection = len(cm.collections['weights'].tensor_names) + \
4543
len(cm.collections['gradients'].tensor_names)

tests/tensorflow/hooks/test_save_reductions.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,20 @@
22
from tornasole.tensorflow import reset_collections, get_collections, CollectionManager
33
import shutil
44
import glob
5+
from tornasole.core.collection_manager import COLLECTIONS_FILE_NAME
56
from tornasole.core.reader import FileReader
67
from tornasole.core.json_config import TORNASOLE_CONFIG_FILE_PATH_ENV_STR
78

89
def helper_save_reductions(trial_dir, hook):
910
simple_model(hook)
1011
_, files = get_dirs_files(trial_dir)
1112
coll = get_collections()
12-
1313
assert len(coll) == 5
1414
assert len(coll['weights'].reduction_tensor_names) == 1
1515
assert len(coll['gradients'].reduction_tensor_names) == 1
1616

17-
assert 'collections.ts' in files
18-
cm = CollectionManager.load(join(trial_dir, 'collections.ts'))
17+
assert COLLECTIONS_FILE_NAME in files
18+
cm = CollectionManager.load(join(trial_dir, COLLECTIONS_FILE_NAME))
1919
assert len(cm.collections) == 5
2020
assert len(cm.collections['weights'].tensor_names) == 0
2121
assert len(cm.collections['weights'].reduction_tensor_names) == 1
@@ -24,8 +24,8 @@ def helper_save_reductions(trial_dir, hook):
2424
# as we hadn't asked to be saved
2525
assert len(cm.collections['optimizer_variables'].tensor_names) == 0
2626
assert len(cm.collections['optimizer_variables'].reduction_tensor_names) == 0
27-
assert len(cm.collections['default'].tensor_names) == 0
2827
assert len(cm.collections['default'].reduction_tensor_names) == 0
28+
assert len(cm.collections['default'].tensor_names) == 0
2929
num_tensors_loaded_collection = len(cm.collections['weights'].tensor_names) + \
3030
len(cm.collections['gradients'].tensor_names) + \
3131
len(cm.collections['default'].tensor_names)

tests/tensorflow/hooks/test_simple_include.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import glob, shutil
55
from tornasole.core.reader import FileReader
66
from tornasole.core.json_config import TORNASOLE_CONFIG_FILE_PATH_ENV_STR
7+
from tornasole.core.collection_manager import COLLECTIONS_FILE_NAME
78

89

910
def helper_test_simple_include(trial_dir, hook):
@@ -12,7 +13,7 @@ def helper_test_simple_include(trial_dir, hook):
1213
_, files = get_dirs_files(trial_dir)
1314
steps, _ = get_dirs_files(os.path.join(trial_dir, 'events'))
1415

15-
cm = CollectionManager.load(join(trial_dir, 'collections.ts'))
16+
cm = CollectionManager.load(join(trial_dir, COLLECTIONS_FILE_NAME))
1617
assert len(cm.collections['default'].tensor_names) == 1
1718
assert len(steps) == 5
1819
for step in steps:
@@ -56,7 +57,7 @@ def helper_test_simple_include_regex(trial_dir, hook):
5657
_, files = get_dirs_files(trial_dir)
5758
steps, _ = get_dirs_files(os.path.join(trial_dir, 'events'))
5859

59-
cm = CollectionManager.load(join(trial_dir, 'collections.ts'))
60+
cm = CollectionManager.load(join(trial_dir, COLLECTIONS_FILE_NAME))
6061
assert len(cm.collections['default'].tensor_names) == 1
6162
assert len(steps) == 5
6263

@@ -103,7 +104,7 @@ def helper_test_multi_collection_match(trial_dir, hook):
103104
_, files = get_dirs_files(trial_dir)
104105
steps, _ = get_dirs_files(os.path.join(trial_dir, 'events'))
105106

106-
cm = CollectionManager.load(join(trial_dir, 'collections.ts'))
107+
cm = CollectionManager.load(join(trial_dir, COLLECTIONS_FILE_NAME))
107108
assert len(cm.collections['default'].tensor_names) == 1
108109
assert len(cm.collections['trial'].tensor_names) == 1
109110
assert len(steps) == 5

tests/tensorflow/hooks/test_weights_gradients.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from .utils import *
22
from tornasole.tensorflow import reset_collections
33
import tensorflow as tf
4+
from tornasole.core.collection_manager import COLLECTIONS_FILE_NAME
45
from tornasole.core.json_config import TORNASOLE_CONFIG_FILE_PATH_ENV_STR
56
import tornasole.tensorflow as ts
67
import shutil
@@ -11,8 +12,8 @@ def helper_test_only_w_g(trial_dir, hook):
1112
steps, _ = get_dirs_files(os.path.join(trial_dir, 'events'))
1213
_, files = get_dirs_files(trial_dir)
1314

14-
assert 'collections.ts' in files
15-
cm = CollectionManager.load(join(trial_dir, 'collections.ts'))
15+
assert COLLECTIONS_FILE_NAME in files
16+
cm = CollectionManager.load(join(trial_dir, COLLECTIONS_FILE_NAME))
1617
num_tensors_loaded_collection = len(cm.collections['weights'].tensor_names) + \
1718
len(cm.collections['gradients'].tensor_names) + \
1819
len(cm.collections['default'].tensor_names)

tests/tensorflow/hooks/test_when_nan.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from tornasole.core.collection_manager import COLLECTIONS_FILE_NAME
12
from .utils import *
23
from tornasole.core.json_config import TORNASOLE_CONFIG_FILE_PATH_ENV_STR
34

@@ -10,8 +11,8 @@ def helper_test_when_nan(trial_dir, hook):
1011
steps, _ = get_dirs_files(os.path.join(trial_dir, 'events'))
1112
_, files = get_dirs_files(trial_dir)
1213

13-
assert 'collections.ts' in files
14-
cm = CollectionManager.load(join(trial_dir, 'collections.ts'))
14+
assert COLLECTIONS_FILE_NAME in files
15+
cm = CollectionManager.load(join(trial_dir, COLLECTIONS_FILE_NAME))
1516
num_tensors_loaded_collection = len(cm.collections['weights'].tensor_names) + \
1617
len(cm.collections['gradients'].tensor_names) + \
1718
len(cm.collections['when_nan'].tensor_names) + \

tornasole/core/collection_manager.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
from .access_layer import TSAccessFile, TSAccessS3
33
from .utils import is_s3
44

5+
COLLECTIONS_FILE_NAME = 'collections.json'
6+
57
class CollectionManager:
68
"""
79
CollectionManager lets you manage group of collections.

tornasole/mxnet/hook.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from tornasole.core.json_config import TORNASOLE_CONFIG_DEFAULT_WORKER_NAME, create_hook_from_json_config
99
from tornasole.core.access_layer.utils import training_has_ended
1010
from tornasole.core.hook_utils import verify_and_get_out_dir
11+
from tornasole.core.collection_manager import COLLECTIONS_FILE_NAME
1112
from .mxnet_collection import get_collection_manager, get_collection
1213
from .util import get_aggregated_data, make_numpy_array
1314
import re as _re
@@ -19,7 +20,6 @@
1920
import atexit
2021

2122
INVALID_TAG_CHARACTERS = _re.compile(r'[^-/\w\.]')
22-
COLLECTION_FILE_NAME = 'collections.ts'
2323
INPUT_TENSOR_SUFFIX = '_input_'
2424
OUTPUT_TENSOR_SUFFIX = '_output'
2525
GRADIENT_PREFIX = 'gradient/'
@@ -100,7 +100,7 @@ def set_mode(self, mode):
100100

101101
def cleanup(self):
102102
if self.last_saved_step != -1:
103-
get_collection_manager().export_manager(os.path.join(self.out_dir, COLLECTION_FILE_NAME))
103+
get_collection_manager().export_manager(os.path.join(self.out_dir, COLLECTIONS_FILE_NAME))
104104
self.export_only_once = False
105105
# Write the gradients of the past step if the writer is still available.
106106
if self.writer is not None:
@@ -155,7 +155,7 @@ def forward_pre_hook(self, block, input):
155155
worker=self.worker)
156156

157157
if self.last_saved_step != -1 and self.export_only_once:
158-
get_collection_manager().export_manager(os.path.join(self.out_dir, COLLECTION_FILE_NAME))
158+
get_collection_manager().export_manager(os.path.join(self.out_dir, COLLECTIONS_FILE_NAME))
159159
self.export_only_once = False
160160
self.last_block = block
161161

@@ -222,7 +222,7 @@ def log_tensor(self, tensor_name, tensor_value):
222222
tensor_value_np = make_numpy_array(tensor_data)
223223
self.writer.write_tensor(tdata=tensor_value_np, tname=reduction_tensor_name,
224224
mode=self.mode, mode_step=self.mode_steps[self.mode])
225-
s_col.add_reduction_tensor_name(tensor_name)
225+
s_col.add_tensor_name(tensor_name)
226226
return
227227
else:
228228
tensor_value = make_numpy_array(tensor_value)

tornasole/pytorch/hook.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from tornasole.pytorch.torch_collection import get_collection_manager, get_collection
1111
from tornasole.pytorch.util import get_aggregated_data, make_numpy_array
1212
from tornasole.core.access_layer.utils import training_has_ended
13+
from tornasole.core.collection_manager import COLLECTIONS_FILE_NAME
1314

1415
import re as _re
1516
import logging
@@ -19,7 +20,6 @@
1920
import atexit
2021

2122
INVALID_TAG_CHARACTERS = _re.compile(r'[^-/\w\.]')
22-
COLLECTION_FILE_NAME = 'collections.ts'
2323
DEFAULT_WORKER_NAME = 'worker0'
2424
INPUT_TENSOR_SUFFIX = '_input_'
2525
OUTPUT_TENSOR_SUFFIX = '_output'
@@ -98,7 +98,7 @@ def set_mode(self, mode):
9898

9999
def cleanup(self):
100100
if not self.exported_collection:
101-
get_collection_manager().export_manager(os.path.join(self.out_dir, COLLECTION_FILE_NAME))
101+
get_collection_manager().export_manager(os.path.join(self.out_dir, COLLECTIONS_FILE_NAME))
102102
# Write the gradients of the past step if the writer is still available.
103103
if self.writer is not None:
104104
self.writer.flush()
@@ -150,7 +150,7 @@ def forward_pre_hook(self, module, input):
150150
self.log_tensor(tensor_name=pname, tensor_value=param.data)
151151

152152
if self.last_saved_step != -1 and not self.exported_collection:
153-
get_collection_manager().export_manager(os.path.join(self.out_dir, COLLECTION_FILE_NAME))
153+
get_collection_manager().export_manager(os.path.join(self.out_dir, COLLECTIONS_FILE_NAME))
154154
self.exported_collection = True
155155
# self.last_block = block
156156

tornasole/tensorflow/hook.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from tornasole.core.modes import ModeKeys, ALLOWED_MODES
1515
from tornasole.core.save_config import SaveConfig
1616
from tornasole.core.access_layer.utils import training_has_ended
17+
from tornasole.core.collection_manager import COLLECTIONS_FILE_NAME
1718
from .save_manager import TFSaveManager
1819

1920
DEFAULT_INCLUDE_COLLECTIONS = ['weights', 'gradients', 'default', 'losses']
@@ -234,7 +235,7 @@ def begin(self):
234235
self.logger.debug(f' Collection {coll.name} has tensors: {coll.tensors}')
235236
self.logger.debug(f' Collection {coll.name} has reductions: {coll.reduction_tensors_added}')
236237

237-
export_collections(os.path.join(self.out_dir, 'collections.ts'))
238+
export_collections(os.path.join(self.out_dir, COLLECTIONS_FILE_NAME))
238239
self._export_model()
239240

240241
def _export_model(self):

tornasole/tensorflow/keras.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from tornasole.core.modes import ModeKeys
1111
from tornasole.core.save_config import SaveConfig
1212
from tornasole.core.save_manager import SaveManager
13+
from tornasole.core.collection_manager import COLLECTIONS_FILE_NAME
1314

1415

1516
class TornasoleHook(keras.callbacks.Callback):
@@ -81,7 +82,7 @@ def _export_collections( self, logs):
8182

8283
add_to_collection("gradients", [])
8384

84-
export_collections(os.path.join(self.out_dir, 'collections.ts'))
85+
export_collections(os.path.join(self.out_dir, COLLECTIONS_FILE_NAME))
8586
# at this point we need all collections to be ready
8687
# this may not be the case at creation of hook
8788
# as user's code after hook might add collections

tornasole/trials/local_trial.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
from .trial import EventFileTensor, Trial
2-
from tornasole.core.locations import EventFileLocation
2+
33
from tornasole.core.utils import index, step_in_range
4-
from tornasole.core.collection_manager import CollectionManager
4+
from tornasole.core.locations import EventFileLocation
5+
from tornasole.core.collection_manager import CollectionManager, \
6+
COLLECTIONS_FILE_NAME
57
from tornasole.core.reader import FileReader
68
from tornasole.core.access_layer.utils import has_training_ended
79

@@ -57,7 +59,7 @@ def _load_tensors_from_event_files(self):
5759
self._read_step_dirs(step_dirs)
5860

5961
def _load_collections(self):
60-
collections_file_path = os.path.join(self.trial_dir, 'collections.ts')
62+
collections_file_path = os.path.join(self.trial_dir, COLLECTIONS_FILE_NAME)
6163
num_times_before_warning = 10
6264
while True:
6365
if os.path.exists(collections_file_path):

tornasole/trials/s3_trial.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
from tornasole.core.access_layer.utils import has_training_ended
66
from tornasole.core.locations import EventFileLocation
77
from tornasole.core.s3_utils import list_s3_objects
8-
from tornasole.core.collection_manager import CollectionManager
8+
from tornasole.core.locations import EventFileLocation
9+
from tornasole.core.collection_manager import CollectionManager, \
10+
COLLECTIONS_FILE_NAME
911
from tornasole.core.tfrecord.tensor_reader import TensorReader
1012
from tornasole.core.utils import step_in_range
1113

@@ -48,8 +50,7 @@ def training_ended(self):
4850
def _load_collections(self):
4951
num_times_before_warning = 10
5052
while True:
51-
# todo get this path from tornasole.core
52-
key = os.path.join(self.prefix_name, 'collections.ts')
53+
key = os.path.join(self.prefix_name, COLLECTIONS_FILE_NAME)
5354
collections_req = ReadObjectRequest(self._get_s3_location(key))
5455
obj_data = self.s3_handler.get_objects([collections_req])[0]
5556
if obj_data is None:

0 commit comments

Comments
 (0)