diff --git a/pandas/core/common.py b/pandas/core/common.py index d63029b447705..87308b20377b3 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -1173,7 +1173,7 @@ def in_interactive_session(): returns True if running under python/ipython interactive shell """ import __main__ as main - return not hasattr(main, '__file__') + return not hasattr(main, '__file__') or get_option('test.interactive') def in_qtconsole(): """ diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index a6739a1c450e9..e38d75a6e0a7b 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -77,6 +77,20 @@ these are generally strings meant to be displayed on the console. """ +pc_expand_repr_doc=""" +: boolean + Default False + Whether to print out the full DataFrame repr for wide DataFrames + across multiple lines. + If False, the summary representation is shown. +""" + +pc_line_width_doc=""" +: int + Default 80 + When printing wide DataFrames, this is the width of each line. +""" + with cf.config_prefix('print'): cf.register_option('precision', 7, pc_precision_doc, validator=is_int) cf.register_option('digits', 7, validator=is_int) @@ -99,3 +113,13 @@ validator=is_bool) cf.register_option('encoding', detect_console_encoding(), pc_encoding_doc, validator=is_text) + cf.register_option('expand_frame_repr', False, pc_expand_repr_doc) + cf.register_option('line_width', 80, pc_line_width_doc) + +tc_interactive_doc=""" +: boolean + Default False + Whether to simulate interactive mode for purposes of testing +""" +with cf.config_prefix('test'): + cf.register_option('interactive', False, tc_interactive_doc) diff --git a/pandas/core/format.py b/pandas/core/format.py index 554ac41d7bf97..97aa23191adaa 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -186,7 +186,7 @@ class DataFrameFormatter(object): def __init__(self, frame, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, justify=None, float_format=None, sparsify=None, - index_names=True, **kwds): + index_names=True, line_width=None, **kwds): self.frame = frame self.buf = buf if buf is not None else StringIO() self.show_index_names = index_names @@ -202,6 +202,7 @@ def __init__(self, frame, buf=None, columns=None, col_space=None, self.col_space = col_space self.header = header self.index = index + self.line_width = line_width if justify is None: self.justify = get_option("print.colheader_justify") @@ -282,10 +283,36 @@ def to_string(self, force_unicode=None): text = info_line else: strcols = self._to_str_columns() - text = adjoin(1, *strcols) + if self.line_width is None: + text = adjoin(1, *strcols) + else: + text = self._join_multiline(*strcols) self.buf.writelines(text) + def _join_multiline(self, *strcols): + lwidth = self.line_width + strcols = list(strcols) + if self.index: + idx = strcols.pop(0) + lwidth -= np.array([len(x) for x in idx]).max() + + col_widths = [np.array([len(x) for x in col]).max() + if len(col) > 0 else 0 + for col in strcols] + col_bins = _binify(col_widths, lwidth) + + str_lst = [] + st = 0 + for ed in col_bins: + row = strcols[st:ed] + row.insert(0, idx) + if ed <= len(strcols): + row.append([' \\'] + [' '] * (len(self.frame) - 1)) + str_lst.append(adjoin(1, *row)) + st = ed + return '\n\n'.join(str_lst) + def to_latex(self, force_unicode=None, column_format=None): """ Render a DataFrame to a LaTeX tabular environment output. @@ -1376,6 +1403,17 @@ def _put_lines(buf, lines): lines = [unicode(x) for x in lines] buf.write('\n'.join(lines)) +def _binify(cols, width): + bins = [] + curr_width = 0 + for i, w in enumerate(cols): + curr_width += w + if curr_width + 2 > width: + bins.append(i) + curr_width = w + elif i + 1== len(cols): + bins.append(i + 1) + return bins if __name__ == '__main__': arr = np.array([746.03, 0.00, 5620.00, 1592.36]) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1a5f582ebc142..c61c721bf07ef 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -592,10 +592,11 @@ def _need_info_repr_(self): max_rows = (terminal_height if get_option("print.max_rows") == 0 else get_option("print.max_rows")) max_columns = get_option("print.max_columns") + expand_repr = get_option("print.expand_frame_repr") if max_columns > 0: if len(self.index) <= max_rows and \ - len(self.columns) <= max_columns: + (len(self.columns) <= max_columns or expand_repr): return False else: return True @@ -603,14 +604,16 @@ def _need_info_repr_(self): # save us if (len(self.index) > max_rows or (com.in_interactive_session() and - len(self.columns) > terminal_width // 2)): + len(self.columns) > terminal_width // 2 and + not expand_repr)): return True else: buf = StringIO() self.to_string(buf=buf) value = buf.getvalue() - if (max([len(l) for l in value.split('\n')]) > terminal_width and - com.in_interactive_session()): + if (max([len(l) for l in value.split('\n')]) > terminal_width + and com.in_interactive_session() + and not expand_repr): return True else: return False @@ -646,13 +649,45 @@ def __unicode__(self): if self._need_info_repr_(): self.info(buf=buf, verbose=self._verbose_info) else: - self.to_string(buf=buf) + is_wide = self._need_wide_repr() + line_width = None + if is_wide: + line_width = get_option('print.line_width') + self.to_string(buf=buf, line_width=line_width) value = buf.getvalue() assert type(value) == unicode return value + def _need_wide_repr(self): + if com.in_qtconsole(): + terminal_width, terminal_height = 100, 100 + else: + terminal_width, terminal_height = get_terminal_size() + max_columns = get_option("print.max_columns") + expand_repr = get_option("print.expand_frame_repr") + + if max_columns > 0: + if len(self.columns) > max_columns and expand_repr: + return True + else: + # save us + if (com.in_interactive_session() and + len(self.columns) > terminal_width // 2 and + expand_repr): + return True + else: + buf = StringIO() + self.to_string(buf=buf) + value = buf.getvalue() + if (max([len(l) for l in value.split('\n')]) > terminal_width + and com.in_interactive_session() + and expand_repr): + return True + + return False + def __repr__(self): """ Return a string representation for a particular DataFrame @@ -1450,7 +1485,8 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='', def to_string(self, buf=None, columns=None, col_space=None, colSpace=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, sparsify=None, nanRep=None, - index_names=True, justify=None, force_unicode=None): + index_names=True, justify=None, force_unicode=None, + line_width=None): """ Render a DataFrame to a console-friendly tabular output. """ @@ -1476,7 +1512,8 @@ def to_string(self, buf=None, columns=None, col_space=None, colSpace=None, sparsify=sparsify, justify=justify, index_names=index_names, - header=header, index=index) + header=header, index=index, + line_width=line_width) formatter.to_string() if buf is None: diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index 54ad50573b216..e85eaefc7867a 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -400,6 +400,110 @@ def test_frame_info_encoding(self): repr(df.T) fmt.set_printoptions(max_rows=200) + def test_wide_repr(self): + set_option('test.interactive', True) + col = lambda l, k: [tm.rands(k) for _ in xrange(l)] + df = DataFrame([col(20, 25) for _ in range(10)]) + rep_str = repr(df) + set_option('print.expand_frame_repr', True) + wide_repr = repr(df) + self.assert_(rep_str != wide_repr) + + set_option('print.line_width', 120) + wider_repr = repr(df) + self.assert_(len(wider_repr) < len(wide_repr)) + + set_option('print.expand_frame_repr', False) + set_option('test.interactive', False) + set_option('print.line_width', 80) + + def test_wide_repr_named(self): + set_option('test.interactive', True) + col = lambda l, k: [tm.rands(k) for _ in xrange(l)] + df = DataFrame([col(20, 25) for _ in range(10)]) + df.index.name = 'DataFrame Index' + rep_str = repr(df) + set_option('print.expand_frame_repr', True) + wide_repr = repr(df) + self.assert_(rep_str != wide_repr) + + set_option('print.line_width', 120) + wider_repr = repr(df) + self.assert_(len(wider_repr) < len(wide_repr)) + + for line in wide_repr.splitlines()[1::13]: + self.assert_('DataFrame Index' in line) + + set_option('print.expand_frame_repr', False) + set_option('test.interactive', False) + set_option('print.line_width', 80) + + def test_wide_repr_multiindex(self): + set_option('test.interactive', True) + col = lambda l, k: [tm.rands(k) for _ in xrange(l)] + midx = pandas.MultiIndex.from_arrays([np.array(col(10, 5)), + np.array(col(10, 5))]) + df = DataFrame([col(20, 25) for _ in range(10)], + index=midx) + df.index.names = ['Level 0', 'Level 1'] + rep_str = repr(df) + set_option('print.expand_frame_repr', True) + wide_repr = repr(df) + self.assert_(rep_str != wide_repr) + + set_option('print.line_width', 120) + wider_repr = repr(df) + self.assert_(len(wider_repr) < len(wide_repr)) + + for line in wide_repr.splitlines()[1::13]: + self.assert_('Level 0 Level 1' in line) + + set_option('print.expand_frame_repr', False) + set_option('test.interactive', False) + set_option('print.line_width', 80) + + def test_wide_repr_multiindex_cols(self): + set_option('test.interactive', True) + col = lambda l, k: [tm.rands(k) for _ in xrange(l)] + midx = pandas.MultiIndex.from_arrays([np.array(col(10, 5)), + np.array(col(10, 5))]) + mcols = pandas.MultiIndex.from_arrays([np.array(col(20, 3)), + np.array(col(20, 3))]) + df = DataFrame([col(20, 25) for _ in range(10)], + index=midx, columns=mcols) + df.index.names = ['Level 0', 'Level 1'] + rep_str = repr(df) + set_option('print.expand_frame_repr', True) + wide_repr = repr(df) + self.assert_(rep_str != wide_repr) + + set_option('print.line_width', 120) + wider_repr = repr(df) + self.assert_(len(wider_repr) < len(wide_repr)) + + self.assert_(len(wide_repr.splitlines()) == 14 * 10 - 1) + + set_option('print.expand_frame_repr', False) + set_option('test.interactive', False) + set_option('print.line_width', 80) + + def test_wide_repr_unicode(self): + set_option('test.interactive', True) + col = lambda l, k: [tm.randu(k) for _ in xrange(l)] + df = DataFrame([col(20, 25) for _ in range(10)]) + rep_str = repr(df) + set_option('print.expand_frame_repr', True) + wide_repr = repr(df) + self.assert_(rep_str != wide_repr) + + set_option('print.line_width', 120) + wider_repr = repr(df) + self.assert_(len(wider_repr) < len(wide_repr)) + + set_option('print.expand_frame_repr', False) + set_option('test.interactive', False) + set_option('print.line_width', 80) + def test_to_string(self): from pandas import read_table import re