build: refactor table builder

Sam Kleinman · Sam Kleinman · commit 20fa680fdc4e · 2012-12-20T20:59:49.000-05:00
diff --git a/bin/table_builder.py b/bin/table_builder.py
@@ -19,76 +19,138 @@
 except ImportError:
     exit('[table-builder]: You must install PyYAML to build tables.')
 
-# global variables
-columnwidths=[]  # the column widths to use in get_rowline() and get_row() methods
-tempcolumnwidths=[] # working list to determine columnwidths global variable
-                    # tempcolumnwidths keeps the running max value for each cell
-
-###################################
-#
-# Generating parts of the table.
-
-def get_row_line(delim='-'):
-    """
-    Prints lines to seperate rows in the column for restrucutred text
-    format.
-    """
-    return '+' + delim + str(delim + '+' + delim).join([ delim * width for width in columnwidths ]) + delim + '+'
-
-def get_row(rowdata):
-    """
-    Prints the contents of the row, seperated as needed for
-    restructured text format.
-    """
-    rowlines = []
-    for line in zip(*rowdata):
-        if len(rowlines) > 0:
-            rowlines.append('\n')
-        rowlines.append( '| ' + ' | '.join([ line[idx].ljust(columnwidths[idx]) for idx in range(len(line)) ]) + ' |' )
-
-    return ''.join(rowlines)
-
-###################################
-#
-# Flexible table building processing.
-
 def normalize_cell_height(rowdata):
     """
     Normalize cells in the rowdata so that each cell has the same height;
     more specifically, ensure that each cell data list spans the same number of
-    lines when printed as others in the row.
+    lines when printed as others in the row. Mutates 'rowdata' in place by only
+    appeneding rather than resetting the reference.
     """
-
-    # This function mutates the rowdata object, which is passed by
-    # reference..  By appending to rowdata content as opposed to
-    # resetting the reference (i.e. setting it equal to something
-    # else) the method modifies whatever was referenced by rowdata and
-    # does not need to explicitly return something
-
     maxlines = max([ len(cell) for cell in rowdata])
 
     for cell in rowdata:
-        for x in range(maxlines-len(cell)):
+        for x in range(maxlines - len(cell)):
             cell.append( ' '  )
 
-def check_column_width(rowdata):
-    """
-    Compares the cell widths of the row with the curren max cell width
-    in the global variables. Then updates the global values in the
-    variable ``tempcolumnwidths``. ``tempcolumnwidths``  variable maintains
-    the running max width of each column.
+def get_default_outputfile(inputfile):
+    return inputfile.rsplit('.')[0] + '.rst'
 
-    By appending to the variable, the global
-    scope of tempcolumnwidths is maintained.
-    """
 
-    thisrowwidths = [ max([len(line) for line in cell ]) for cell in rowdata]
+###################################
+#
+# Generating parts of the table.
 
-    if len( tempcolumnwidths ) == 0:
-        tempcolumnwidths.append(thisrowwidths)
-    else:
-        currentmaxwidths = tempcolumnwidths.pop()
-        tempcolumnwidths.append([ max(currentmaxwidths[i], thisrowwidths[i]) for i in range(len(currentmaxwidths))])
+class RstTable(object):
+    def __init__(self, inputfile):
+        self.columnwidths = []
+        self.tempcolumnwidths = []
+        self.read_data(inputfile)
+        self.process_table_content()
+
+    def read_data(self, datafile):
+        with open(datafile, "rt") as f:
+            parsed = yaml.load_all(f)
+            layout = dict2obj(parsed.next())
+            meta = dict2obj(parsed.next())
+            content = dict2obj(parsed.next())
+
+        if layout.section != 'layout':
+            exit('layout document in "' + datafile + '" is malformed.')
+        elif meta.section != 'meta':
+            exit('meta document in "' + datafile + '" is malformed.')
+        elif content.section != 'content':
+            exit('content document in "' + datafile + '" is malformed.')
+
+        rows = { 'rows': [] }
+
+        if layout.header:
+            header = []
+            for cell in layout.header:
+                header.append([eval(cell)])
+        else:
+            header = None
+
+        for rownum in layout.rows:
+            parsed_cell = []
+            for cell in rownum.items()[0][1]:
+                parsed_cell.append(eval(cell))
+            rows['rows'].append( dict(zip(rownum.keys(), [parsed_cell])) )
+
+        # return header, rows
+        self.header = header
+        self.rows = rows
+
+    ###################################
+    #
+    # Flexibility for tables of different sizes.
+
+    def check_column_width(self, rowdata):
+        """
+        Compares the cell widths of the row with the curren max cell
+        width in the global variables. Then updates
+        ``tempcolumnwidths``. ``tempcolumnwidths`` variable maintains
+        the running max width of each column.
+        """
+
+        thisrowwidths = [ max([len(line) for line in cell ]) for cell in rowdata]
+
+        if len(self.tempcolumnwidths) == 0:
+            self.tempcolumnwidths.append(thisrowwidths)
+        else:
+            currentmaxwidths = self.tempcolumnwidths.pop()
+            self.tempcolumnwidths.append([ max(currentmaxwidths[i], thisrowwidths[i]) for i in range(len(currentmaxwidths))])
+
+
+    ###################################
+    #
+    # Building the table representation
+
+    def get_row_line(self, delim='-'):
+        """
+        Produces and returns row deliminiters for restructured text tables.
+        """
+        return '+' + delim + str(delim + '+' + delim).join([ delim * width for width in self.columnwidths ]) + delim + '+'
+
+    def get_row(self, rowdata):
+        """
+        Returns rows given ``rowdata`` properly formated for restructured text tables.
+        """
+        rowlines = []
+        for line in zip(*rowdata):
+            if len(rowlines) > 0:
+                rowlines.append('\n')
+            rowlines.append( '| ' + ' | '.join([ line[idx].ljust(self.columnwidths[idx]) for idx in range(len(line)) ]) + ' |' )
+
+        return ''.join(rowlines)
+
+    def process_table_content(self):
+        self.tabledata = []
+
+        # Compare cell widths of the header  with the
+        # max cell widths stored in the global var tempcolumnwidths
+        # and swap out value(s) if necessary.
+        if self.header is not None:
+            self.check_column_width(self.header)
+
+        for index in range(len(self.rows['rows'])):
+            parsed_row = []
+
+            # Append each cell to the parsed_row list, breaking multi-line
+            # cell data as needed.
+            for cell in self.rows['rows'][index][index + 1]:
+                parsed_row.append(cell.split('\n'))
+
+            # process the data to ensure the table is big enough.
+            self.check_column_width(parsed_row)
+            normalize_cell_height(parsed_row)
+
+            # add the processed data to the table
+            self.tabledata.append(parsed_row)
+
+        # Set the global variable columnwidths to the flattened out
+        # tempcolumnwidths
+        for cellwidth in self.tempcolumnwidths.pop():
+            self.columnwidths.append(cellwidth)
 
 ###################################
 #
@@ -109,114 +171,31 @@ def __getattr__(self, key):
 
         return value
 
-def check_input_data(datafile, layout, meta, content):
-    if layout != 'layout':
-        exit('layout document in "' + datafile + '" is malformed.')
-    elif meta != 'meta':
-        exit('meta document in "' + datafile + '" is malformed.')
-    elif content != 'content':
-        exit('content document in "' + datafile + '" is malformed.')
-
-def read_data(datafile):
-    with open(datafile, "rt") as f:
-        parsed = yaml.load_all(f)
-        layout = dict2obj(parsed.next())
-        meta = dict2obj(parsed.next())
-        content = dict2obj(parsed.next())
-
-    check_input_data(datafile, layout.section, meta.section, content.section)
-
-    rows = { 'rows': [] }
-
-    if layout.header:
-        header = []
-        for cell in layout.header:
-            header.append([eval(cell)])
-    else:
-        header = None
-
-    for rownum in layout.rows:
-        parsed_cell = []
-        for cell in rownum.items()[0][1]:
-            parsed_cell.append(eval(cell))
-        rows['rows'].append( dict(zip(rownum.keys(), [parsed_cell])) )
-
-    return header, rows
-
-def render_table(header, rows):
-    tabledata = []
-
-    # Compare cell widths of the header  with the
-    # max cell widths stored in the global var tempcolumnwidths
-    # and swap out value(s) if necessary.
-    if header is not None:
-        check_column_width(header)
-
-    for index in range(len(rows['rows'])):
-        parsed_row = []
-
-        # Append each cell to the parsed_row list, breaking multi-line
-        # cell data as needed.
-        for cell in rows['rows'][index][index + 1]:
-            parsed_row.append(cell.split('\n'))
-
-        # process the data to ensure the table is big enough.
-        check_column_width(parsed_row)
-        normalize_cell_height(parsed_row)
-
-        # add the processed data to the table
-        tabledata.append(parsed_row)
-
-    # Set the global variable columnwidths to the flattened out
-    # tempcolumnwidths
-    for cellwidth in tempcolumnwidths.pop():
-        columnwidths.append(cellwidth)
-
-    output = []
-    output.append(get_row_line())
-
-    if header is not None:
-        output.append(get_row(header))
-        output.append(get_row_line('='))
-
-    for row in tabledata:
-        output.append(get_row(row))
-        output.append(get_row_line())
-
-    return output
-
 ###################################
 #
 # Interaction
 
-def get_default_outputfile(inputfile):
-    return  inputfile.rsplit('.')[0] + '.rst'
+class YamlTableBuilder(RstTable):
+    def __init__(self, inputfile):
+        self.inputfile = inputfile
+        super(YamlTableBuilder, self).__init__(inputfile)
+        self.output = self.render_table()
 
-def cli():
-    # this is a total hack to avoid argparse. first argument is input,
-    # second is output.  we'll have to break down and use argparse if
-    # we want any other options, just for sanity.
+    def render_table(self):
+        o = []
+        o.append(self.get_row_line())
 
-    inputfile = sys.argv[1]
+        if self.header is not None:
+            o.append(self.get_row(self.header))
+            o.append(self.get_row_line('='))
 
-    try:
-        outputfile = sys.argv[2]
-    except IndexError:
-        outputfile = get_default_outputfile(inputfile)
+        for self.row in self.tabledata:
+            o.append(self.get_row(self.row))
+            o.append(self.get_row_line())
 
-    return inputfile, outputfile
+        return o
 
-###################################
-#
-# Interfaces.
-
-class YamlTableBuilder(object):
-    def __init__(self, inputfile):
-        self.inputfile = inputfile
-        self.table_header, self.table_rows = read_data(inputfile)
-        self.output = render_table(self.table_header, self.table_rows)
-
-    def write_file(self, outputfile=None):
+    def write(self, outputfile=None):
         if outputfile is None:
             outputfile = get_default_outputfile(self.inputfile)
 
@@ -228,12 +207,24 @@ def print_table(self):
         for line in self.output:
             print(line)
 
+###################################
+#
+# Interface.
+
 def main():
-    inputfile, outputfile = cli()
+    # the following is a total hack to avoid argparse. first argument
+    # is input, second is output.  we'll have to break down and use
+    # argparse if we want any other options, just for sanity.
 
-    table = YamlTableBuilder(inputfile)
+    inputfile = sys.argv[1]
+
+    try:
+        outputfile = sys.argv[2]
+    except IndexError:
+        outputfile = get_default_outputfile(inputfile)
 
-    table.write_file(outputfile)
+    table = YamlTableBuilder(inputfile)
+    table.write(outputfile)
 
 if __name__ == '__main__':
     main()