# Copyright 2012 Google Inc. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Table formatting library. We define a TableFormatter interface, and create subclasses for several different print formats, including formats intended for both human and machine consumption: Human Consumption ----------------- PrettyFormatter: This prints ASCII-art bordered tables. Inspired by the prettytable python library. Example: +-----+---------------+ | foo | longer header | +-----+---------------+ | a | 3 | | ... | | abc | 123 | +-----+---------------+ SparsePrettyFormatter: This is a PrettyFormatter which simply doesn't print most of the border. Example: foo longer header ----- --------------- a 3 ... abc 123 PrettyJsonFormatter: Prints JSON output in a format easily read by a human. Example: [ { "foo": "a", "longer header": 3 }, ... { "foo": "abc", "longer header": 123 } ] Machine Consumption ------------------- CsvFormatter: Prints output in CSV form, with minimal quoting, '\n' separation between lines, and including a header line. Example: foo,longer header a,3 ... abc,123 JsonFormatter: Prints JSON output in the most compact form possible. Example: [{"foo":"a","longer header":3},...,{"foo":"abc","longer header":123}] Additional formatters can be added by subclassing TableFormatter and overriding the following methods: __len__, __unicode__, AddRow, column_names, AddColumn """ import cStringIO import csv import itertools import json import sys class FormatterException(Exception): pass class TableFormatter(object): """Interface for table formatters.""" def __init__(self, **kwds): """Initializes the base class. Keyword arguments: skip_header_when_empty: If true, does not print the table's header if there are zero rows. This argument has no effect on PrettyJsonFormatter. """ if self.__class__ == TableFormatter: raise NotImplementedError( 'Cannot instantiate abstract class TableFormatter') self.skip_header_when_empty = kwds.get('skip_header_when_empty', False) def __nonzero__(self): return bool(len(self)) def __len__(self): raise NotImplementedError('__len__ must be implemented by subclass') def __str__(self): return unicode(self).encode(sys.getdefaultencoding(), 'backslashreplace') def __unicode__(self): raise NotImplementedError('__unicode__ must be implemented by subclass') def Print(self): if self: # TODO(user): Make encoding a customizable attribute on # the TableFormatter. encoding = sys.stdout.encoding or 'utf8' print unicode(self).encode(encoding, 'backslashreplace') def AddRow(self, row): """Add a new row (an iterable) to this formatter.""" raise NotImplementedError('AddRow must be implemented by subclass') def AddRows(self, rows): """Add all rows to this table.""" for row in rows: self.AddRow(row) def AddField(self, field): """Add a field as a new column to this formatter.""" # TODO(user): Excise this bigquery-specific method. align = 'l' if field.get('type', []) == 'STRING' else 'r' self.AddColumn(field['name'], align=align) def AddFields(self, fields): """Convenience method to add a list of fields.""" for field in fields: self.AddField(field) def AddDict(self, d): """Add a dict as a row by using column names as keys.""" self.AddRow([d.get(name, '') for name in self.column_names]) @property def column_names(self): """Return the ordered list of column names in self.""" raise NotImplementedError('column_names must be implemented by subclass') def AddColumn(self, column_name, align='r', **kwds): """Add a new column to this formatter.""" raise NotImplementedError('AddColumn must be implemented by subclass') def AddColumns(self, column_names, kwdss=None): """Add a series of columns to this formatter.""" kwdss = kwdss or [{}] * len(column_names) for column_name, kwds in zip(column_names, kwdss): self.AddColumn(column_name, **kwds) class PrettyFormatter(TableFormatter): """Formats output as an ASCII-art table with borders.""" def __init__(self, **kwds): """Initialize a new PrettyFormatter. Keyword arguments: junction_char: (default: +) Character to use for table junctions. horizontal_char: (default: -) Character to use for horizontal lines. vertical_char: (default: |) Character to use for vertical lines. """ super(PrettyFormatter, self).__init__(**kwds) self.junction_char = kwds.get('junction_char', '+') self.horizontal_char = kwds.get('horizontal_char', '-') self.vertical_char = kwds.get('vertical_char', '|') self.rows = [] self.row_heights = [] self._column_names = [] self.column_widths = [] self.column_alignments = [] self.header_height = 1 def __len__(self): return len(self.rows) def __unicode__(self): if self or not self.skip_header_when_empty: lines = itertools.chain( self.FormatHeader(), self.FormatRows(), self.FormatHrule()) else: lines = [] return '\n'.join(lines) @staticmethod def CenteredPadding(interval, size, left_justify=True): """Compute information for centering a string in a fixed space. Given two integers interval and size, with size <= interval, this function computes two integers left_padding and right_padding with left_padding + right_padding + size = interval and |left_padding - right_padding| <= 1. In the case that interval and size have different parity, left_padding will be larger iff left_justify is True. (That is, iff the string should be left justified in the "center" space.) Args: interval: Size of the fixed space. size: Size of the string to center in that space. left_justify: (optional, default: True) Whether the string should be left-justified in the center space. Returns: left_padding, right_padding: The size of the left and right margins for centering the string. Raises: FormatterException: If size > interval. """ if size > interval: raise FormatterException('Illegal state in table formatting') same_parity = (interval % 2) == (size % 2) padding = (interval - size) / 2 if same_parity: return padding, padding elif left_justify: return padding, padding + 1 else: return padding + 1, padding @staticmethod def Abbreviate(s, width): """Abbreviate a string to at most width characters.""" suffix = '.' * min(width, 3) return s if len(s) <= width else s[:width - len(suffix)] + suffix @staticmethod def FormatCell(entry, cell_width, cell_height=1, align='c', valign='t'): """Format an entry into a list of strings for a fixed cell size. Given a (possibly multi-line) entry and a cell height and width, we split the entry into a list of lines and format each one into the given width and alignment. We then pad the list with additional blank lines of the appropriate width so that the resulting list has exactly cell_height entries. Each entry is also padded with one space on either side. We abbreviate strings for width, but we require that the number of lines in entry is at most cell_height. Args: entry: String to format, which may have newlines. cell_width: Maximum width for lines in the cell. cell_height: Number of lines in the cell. align: Alignment to use for lines of text. valign: Vertical alignment in the cell. One of 't', 'c', or 'b' (top, center, and bottom, respectively). Returns: An iterator yielding exactly cell_height lines, each of exact width cell_width + 2, corresponding to this cell. Raises: FormatterException: If there are too many lines in entry. ValueError: If the valign is invalid. """ entry_lines = [PrettyFormatter.Abbreviate(line, cell_width) for line in entry.split('\n')] if len(entry_lines) > cell_height: raise FormatterException('Too many lines (%s) for a cell of size %s' % ( len(entry_lines), cell_height)) if valign == 't': top_lines = [] bottom_lines = itertools.repeat(' ' * (cell_width + 2), cell_height - len(entry_lines)) elif valign == 'c': top_padding, bottom_padding = PrettyFormatter.CenteredPadding( cell_height, len(entry_lines)) top_lines = itertools.repeat(' ' * (cell_width + 2), top_padding) bottom_lines = itertools.repeat(' ' * (cell_width + 2), bottom_padding) elif valign == 'b': bottom_lines = [] top_lines = itertools.repeat(' ' * (cell_width + 2), cell_height - len(entry_lines)) else: raise ValueError('Unknown value for valign: %s' % (valign,)) content_lines = [] for line in entry_lines: if align == 'c': left_padding, right_padding = PrettyFormatter.CenteredPadding( cell_width, len(line)) content_lines.append(' %s%s%s ' % ( ' ' * left_padding, line, ' ' * right_padding)) elif align in ('l', 'r'): fmt = ' %*s ' if align == 'r' else ' %-*s ' content_lines.append(fmt % (cell_width, line)) else: raise FormatterException('Unknown alignment: %s' % (align,)) return itertools.chain(top_lines, content_lines, bottom_lines) def FormatRow(self, entries, row_height, column_alignments=None, column_widths=None): """Format a row into a list of strings. Given a list of entries, which must be the same length as the number of columns in this table, and a desired row height, we generate a list of strings corresponding to the printed representation of that row. Args: entries: List of entries to format. row_height: Number of printed lines corresponding to this row. column_alignments: (optional, default self.column_alignments) The alignment to use for each column. column_widths: (optional, default self.column_widths) The widths of each column. Returns: An iterator over the strings in the printed representation of this row. """ column_alignments = column_alignments or self.column_alignments column_widths = column_widths or self.column_widths # pylint:disable-msg=C6402 curried_format = lambda entry, width, align: self.__class__.FormatCell( unicode(entry), width, cell_height=row_height, align=align) printed_rows = itertools.izip(*itertools.imap( curried_format, entries, column_widths, column_alignments)) return (self.vertical_char.join(itertools.chain([''], cells, [''])) for cells in printed_rows) def HeaderLines(self): """Return an iterator over the row(s) for the column names.""" aligns = itertools.repeat('c') return self.FormatRow(self.column_names, self.header_height, column_alignments=aligns) def FormatHrule(self): """Return a list containing an hrule for this table.""" entries = (''.join(itertools.repeat('-', width + 2)) for width in self.column_widths) return [self.junction_char.join(itertools.chain([''], entries, ['']))] def FormatHeader(self): """Return an iterator over the lines for the header of this table.""" return itertools.chain( self.FormatHrule(), self.HeaderLines(), self.FormatHrule()) def FormatRows(self): """Return an iterator over all the rows in this table.""" return itertools.chain(*itertools.imap( self.FormatRow, self.rows, self.row_heights)) def AddRow(self, row): """Add a row to this table. Args: row: A list of length equal to the number of columns in this table. Raises: FormatterException: If the row length is invalid. """ if len(row) != len(self.column_names): raise FormatterException('Invalid row length: %s' % (len(row),)) split_rows = [unicode(entry).split('\n') for entry in row] self.row_heights.append(max(len(lines) for lines in split_rows)) column_widths = (max(len(line) for line in entry) for entry in split_rows) self.column_widths = [max(width, current) for width, current in itertools.izip(column_widths, self.column_widths)] self.rows.append(row) def AddColumn(self, column_name, align='l', **kwds): """Add a column to this table. Args: column_name: Name for the new column. align: (optional, default: 'l') Alignment for the new column entries. Raises: FormatterException: If the table already has any rows, or if the provided alignment is invalid. """ if self: raise FormatterException( 'Cannot add a new column to an initialized table') if align not in ('l', 'c', 'r'): raise FormatterException('Invalid column alignment: %s' % (align,)) lines = column_name.split('\n') self.column_widths.append(max(len(line) for line in lines)) self.column_alignments.append(align) self.column_names.append(column_name) self.header_height = max(len(lines), self.header_height) @property def column_names(self): return self._column_names class SparsePrettyFormatter(PrettyFormatter): """Formats output as a table with a header and separator line.""" def __init__(self, **kwds): """Initialize a new SparsePrettyFormatter.""" default_kwds = {'junction_char': ' ', 'vertical_char': ' '} default_kwds.update(kwds) super(SparsePrettyFormatter, self).__init__(**default_kwds) def __unicode__(self): if self or not self.skip_header_when_empty: lines = itertools.chain(self.FormatHeader(), self.FormatRows()) else: lines = [] return '\n'.join(lines) def FormatHeader(self): """Return an iterator over the header lines for this table.""" return itertools.chain(self.HeaderLines(), self.FormatHrule()) class CsvFormatter(TableFormatter): """Formats output as CSV with header lines. The resulting CSV file includes a header line, uses Unix-style newlines, and only quotes those entries which require it, namely those that contain quotes, newlines, or commas. """ def __init__(self, **kwds): super(CsvFormatter, self).__init__(**kwds) self._buffer = cStringIO.StringIO() self._header = [] self._table = csv.writer( self._buffer, quoting=csv.QUOTE_MINIMAL, lineterminator='\n') def __nonzero__(self): return bool(self._buffer.tell()) def __len__(self): return len(unicode(self).splitlines()) def __unicode__(self): if self or not self.skip_header_when_empty: lines = [','.join(self._header), self._buffer.getvalue()] else: lines = [] # Note that we need to explicitly decode here to work around # the fact that the CSV module does not work with unicode. return '\n'.join(line.decode('utf8') for line in lines).rstrip() @property def column_names(self): return self._header[:] def AddColumn(self, column_name, **kwds): if self: raise FormatterException( 'Cannot add a new column to an initialized table') self._header.append(column_name) def AddRow(self, row): self._table.writerow([unicode(entry).encode('utf8', 'backslashreplace') for entry in row]) class JsonFormatter(TableFormatter): """Formats output in maximally compact JSON.""" def __init__(self, **kwds): super(JsonFormatter, self).__init__(**kwds) self._field_names = [] self._table = [] def __len__(self): return len(self._table) def __unicode__(self): return json.dumps(self._table, separators=(',', ':'), ensure_ascii=False) @property def column_names(self): return self._field_names[:] def AddColumn(self, column_name, **kwds): if self: raise FormatterException( 'Cannot add a new column to an initialized table') self._field_names.append(column_name) def AddRow(self, row): if len(row) != len(self._field_names): raise FormatterException('Invalid row: %s' % (row,)) self._table.append(dict(zip(self._field_names, row))) class PrettyJsonFormatter(JsonFormatter): """Formats output in human-legible JSON.""" def __unicode__(self): return json.dumps(self._table, sort_keys=True, indent=2, ensure_ascii=False) class NullFormatter(TableFormatter): """Formatter that prints no output at all.""" def __init__(self, **kwds): super(NullFormatter, self).__init__(**kwds) self._column_names = [] self._rows = [] def __nonzero__(self): return bool(self._rows) def __len__(self): return len(self._rows) def __unicode__(self): return '' def AddRow(self, row): self._rows.append(row) def AddRows(self, rows): for row in rows: self.AddRow(row) @property def column_names(self): return self._column_names[:] def AddColumn(self, column_name, **kwds): self._column_names.append(column_name) def GetFormatter(table_format): """Map a format name to a TableFormatter object.""" if table_format == 'csv': table_formatter = CsvFormatter() elif table_format == 'pretty': table_formatter = PrettyFormatter() elif table_format == 'json': table_formatter = JsonFormatter() elif table_format == 'prettyjson': table_formatter = PrettyJsonFormatter() elif table_format == 'sparse': table_formatter = SparsePrettyFormatter() elif table_format == 'none': table_formatter = NullFormatter() else: raise FormatterException('Unknown format: %s' % table_format) return table_formatter