atqy
diff --git a/‎setup.py
Lines changed: 8 additions & 5 deletions b/‎setup.py
Lines changed: 8 additions & 5 deletions
diff --git a/‎tornasole/mxnet/graph.py
Lines changed: 96 additions & 0 deletions b/‎tornasole/mxnet/graph.py
Lines changed: 96 additions & 0 deletions
diff --git a/‎tornasole/mxnet/hook.py
Lines changed: 10 additions & 12 deletions b/‎tornasole/mxnet/hook.py
Lines changed: 10 additions & 12 deletions
diff --git a/‎tornasole/pytorch/_proto_graph.py
Lines changed: 50 additions & 0 deletions b/‎tornasole/pytorch/_proto_graph.py
Lines changed: 50 additions & 0 deletions
@@ -8,11 +8,14 @@
 
 
 def compile_summary_protobuf():
-    proto_path = 'tornasole/core/tfevent/proto'
-    proto_files = os.path.join(proto_path, '*.proto')
-    cmd = 'protoc ' + proto_files + ' --python_out=.'
-    print('compiling protobuf files in {}'.format(proto_path))
-    return os.system('set -ex &&' + cmd)
+    proto_paths = ['tornasole/core/tfevent/proto', 'tornasole/pytorch/proto']
+    cmd = 'set -ex && protoc '
+    for proto_path in proto_paths:
+        proto_files = os.path.join(proto_path, '*.proto')
+        cmd += proto_files + ' '
+        print('compiling protobuf files in {}'.format(proto_path))
+    cmd += ' --python_out=.'
+    return os.system(cmd)
 
 
 def get_framework_packages(f):
 
@@ -0,0 +1,96 @@
+from mxnet.ndarray import NDArray
+from mxnet.symbol import Symbol
+from mxnet.gluon import HybridBlock
+import json
+
+from tornasole.core.tfevent.proto.graph_pb2 import GraphDef
+from tornasole.core.tfevent.proto.node_def_pb2 import NodeDef
+from tornasole.core.tfevent.proto.versions_pb2 import VersionDef
+from tornasole.core.tfevent.proto.attr_value_pb2 import AttrValue
+
+
+def _scoped_name(scope_name, node_name):
+    return '/'.join([scope_name, node_name])
+
+
+def _get_nodes_from_symbol(sym):
+    """Given a symbol and shapes, return a list of `NodeDef`s for visualizing the
+    the graph in TensorBoard."""
+    if not isinstance(sym, Symbol):
+        raise TypeError('sym must be an `mxnet.symbol.Symbol`,'
+                        ' received type {}'.format(str(type(sym))))
+    conf = json.loads(sym.tojson())
+    nodes = conf['nodes']
+    data2op = {}  # key: data id, value: list of ops to whom data is an input
+    for i, node in enumerate(nodes):
+        if node['op'] != 'null':  # node is an operator
+            input_list = node['inputs']
+            for idx in input_list:
+                if idx[0] == 0:  # do not include 'data' node in the op scope
+                    continue
+                if idx[0] in data2op:
+                    # nodes[idx[0]] is a data as an input to op nodes[i]
+                    data2op[idx[0]].append(i)
+                else:
+                    data2op[idx[0]] = [i]
+
+    # In the following, we group data with operators they belong to
+    # by attaching them with operator names as scope names.
+    # The parameters with the operator name as the prefix will be
+    # assigned with the scope name of that operator. For example,
+    # a convolution op has name 'conv', while its weight and bias
+    # have name 'conv_weight' and 'conv_bias'. In the end, the operator
+    # has scope name 'conv' prepended to its name, i.e. 'conv/conv'.
+    # The parameters are named 'conv/conv_weight' and 'conv/conv_bias'.
+    node_defs = []
+    for i, node in enumerate(nodes):
+        node_name = node['name']
+        op_name = node['op']
+        kwargs = {'op': op_name, 'name': node_name}
+        if op_name != 'null':  # node is an operator
+            inputs = []
+            input_list = node['inputs']
+            for idx in input_list:
+                input_node = nodes[idx[0]]
+                input_node_name = input_node['name']
+                if input_node['op'] != 'null':
+                    inputs.append(_scoped_name(input_node_name, input_node_name))
+                elif idx[0] in data2op and len(data2op[idx[0]]) == 1 and data2op[idx[0]][0] == i:
+                    # the data is only as an input to nodes[i], no else
+                    inputs.append(_scoped_name(node_name, input_node_name))
+                else:  # the data node has no scope name, e.g. 'data' as the input node
+                    inputs.append(input_node_name)
+            kwargs['input'] = inputs
+            kwargs['name'] = _scoped_name(node_name, node_name)
+        elif i in data2op and len(data2op[i]) == 1:
+            # node is a data node belonging to one op, find out which operator this node belongs to
+            op_node_name = nodes[data2op[i][0]]['name']
+            kwargs['name'] = _scoped_name(op_node_name, node_name)
+
+        if 'attrs' in node:
+            # TensorBoard would escape quotation marks, replace it with space
+            attr = json.dumps(node['attrs'], sort_keys=True).replace("\"", ' ')
+            attr = {'param': AttrValue(s=attr.encode(encoding='utf-8'))}
+            kwargs['attr'] = attr
+        node_def = NodeDef(**kwargs)
+        node_defs.append(node_def)
+    return node_defs
+
+
+def _sym2pb(sym):
+    """Converts an MXNet symbol to its graph protobuf definition."""
+    return GraphDef(node=_get_nodes_from_symbol(sym), versions=VersionDef(producer=100))
+
+
+def _net2pb(net):
+    if isinstance(net, HybridBlock):
+        # TODO(junwu): may need a more approprite way to get symbol from a HybridBlock
+        if not net._cached_graph:
+            raise RuntimeError(
+                "Please first call net.hybridize() and then run forward with "
+                "this net at least once before calling add_graph().")
+        net = net._cached_graph[1]
+    elif not isinstance(net, Symbol):
+        raise TypeError('only accepts mxnet.gluon.HybridBlock and mxnet.symbol.Symbol '
+                        'as input network, received type {}'.format(str(type(net))))
+    return _sym2pb(net)
@@ -6,7 +6,7 @@
 from tornasole.mxnet.mxnet_collection import get_collection_manager
 from tornasole.mxnet.singleton_utils import set_hook
 from tornasole.mxnet.utils import get_reduction_of_data, make_numpy_array
-# from tornasole.mxnet.graph import _net2pb
+from tornasole.mxnet.graph import _net2pb
 
 DEFAULT_INCLUDE_COLLECTIONS = [CollectionKeys.LOSSES]
 
@@ -81,16 +81,15 @@ def log_param(self, param):
                                   tensor_value=param.grad(param.list_ctx()[0]))
 
     def _export_model(self):
-        pass
-        # if self.model is not None:
-        #     try:
-        #         self._get_tb_writer().write_graph(_net2pb(self.model))
-        #     except (RuntimeError, TypeError) as e:
-        #         self.logger.warning(
-        #                 f'Could not export model graph for tensorboard '
-        #                 f'due to the mxnet exception: {e}')
-        # else:
-        #     self.logger.warning('Tornasole does not know the model')
+        if self.model is not None:
+            try:
+                self._get_tb_writer().write_graph(_net2pb(self.model))
+            except (RuntimeError, TypeError) as e:
+                self.logger.warning(
+                        f'Could not export model graph for tensorboard '
+                        f'due to the mxnet exception: {e}')
+        else:
+            self.logger.warning('Tornasole does not know the model')
 
     # This hook is invoked by trainer prior to running the forward pass.
     def forward_pre_hook(self, block, inputs):
@@ -119,7 +118,6 @@ def forward_pre_hook(self, block, inputs):
 
         if self.last_saved_step is not None and not self.exported_collections:
             self.export_collections()
-            self._export_model()
             self.exported_collections = True
 
         self.last_block = block
 
@@ -0,0 +1,50 @@
+#Taken from https://github.com/pytorch/pytorch/blob/c749be9e9f8dd3db8b3582e93f917bd47e8e9e20/torch/utils/tensorboard/_proto_graph.py
+
+from tornasole.core.tfevent.proto.node_def_pb2 import NodeDef
+from tornasole.core.tfevent.proto.attr_value_pb2 import AttrValue
+from tornasole.core.tfevent.proto.tensor_shape_pb2 import TensorShapeProto
+
+
+def attr_value_proto(dtype, shape, s):
+    """Creates a dict of objects matching
+    https://github.com/tensorflow/tensorboard/blob/master/tensorboard/compat/proto/attr_value.proto
+    specifically designed for a NodeDef. The values have been
+    reverse engineered from standard TensorBoard logged data.
+    """
+    attr = {}
+    if s is not None:
+        attr['attr'] = AttrValue(s=s.encode(encoding='utf_8'))
+    if shape is not None:
+        shapeproto = tensor_shape_proto(shape)
+        attr['_output_shapes'] = AttrValue(list=AttrValue.ListValue(shape=[shapeproto]))
+    return attr
+
+
+def tensor_shape_proto(outputsize):
+    """Creates an object matching
+    https://github.com/tensorflow/tensorboard/blob/master/tensorboard/compat/proto/tensor_shape.proto
+    """
+    return TensorShapeProto(dim=[TensorShapeProto.Dim(size=d) for d in outputsize])
+
+
+def node_proto(name,
+               op='UnSpecified',
+               input=None,
+               dtype=None,
+               shape=None,  # type: tuple
+               outputsize=None,
+               attributes=''
+               ):
+    """Creates an object matching
+    https://github.com/tensorflow/tensorboard/blob/master/tensorboard/compat/proto/node_def.proto
+    """
+    if input is None:
+        input = []
+    if not isinstance(input, list):
+        input = [input]
+    return NodeDef(
+        name=name.encode(encoding='utf_8'),
+        op=op,
+        input=input,
+        attr=attr_value_proto(dtype, outputsize, attributes)
+    )