Merge PR #2436, wide DataFrame repr

wesm · wesm · commit cde969003fed · 2012-12-07T17:37:44.000-05:00
diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -1179,7 +1179,7 @@ def in_interactive_session():
     returns True if running under python/ipython interactive shell
     """
     import __main__ as main
-    return not hasattr(main, '__file__')
+    return not hasattr(main, '__file__') or get_option('test.interactive')
 
 def in_qtconsole():
     """
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
@@ -101,6 +101,20 @@
     Controls the justification of column headers. used by DataFrameFormatter.
 """
 
+pc_expand_repr_doc="""
+: boolean
+    Default False
+    Whether to print out the full DataFrame repr for wide DataFrames
+    across multiple lines.
+    If False, the summary representation is shown.
+"""
+
+pc_line_width_doc="""
+: int
+    Default 80
+    When printing wide DataFrames, this is the width of each line.
+"""
+
 with cf.config_prefix('print'):
     cf.register_option('precision', 7, pc_precision_doc, validator=is_int)
     cf.register_option('float_format', None, float_format_doc)
@@ -122,3 +136,13 @@
                        validator=is_bool)
     cf.register_option('encoding', detect_console_encoding(), pc_encoding_doc,
                     validator=is_text)
+    cf.register_option('expand_frame_repr', False, pc_expand_repr_doc)
+    cf.register_option('line_width', 80, pc_line_width_doc)
+
+tc_interactive_doc="""
+: boolean
+    Default False
+    Whether to simulate interactive mode for purposes of testing
+"""
+with cf.config_prefix('test'):
+    cf.register_option('interactive', False, tc_interactive_doc)
diff --git a/pandas/core/format.py b/pandas/core/format.py
@@ -186,7 +186,7 @@ class DataFrameFormatter(object):
     def __init__(self, frame, buf=None, columns=None, col_space=None,
                  header=True, index=True, na_rep='NaN', formatters=None,
                  justify=None, float_format=None, sparsify=None,
-                 index_names=True, **kwds):
+                 index_names=True, line_width=None, **kwds):
         self.frame = frame
         self.buf = buf if buf is not None else StringIO()
         self.show_index_names = index_names
@@ -202,6 +202,7 @@ def __init__(self, frame, buf=None, columns=None, col_space=None,
         self.col_space = col_space
         self.header = header
         self.index = index
+        self.line_width = line_width
 
         if justify is None:
             self.justify = get_option("print.colheader_justify")
@@ -282,10 +283,36 @@ def to_string(self, force_unicode=None):
             text = info_line
         else:
             strcols = self._to_str_columns()
-            text = adjoin(1, *strcols)
+            if self.line_width is None:
+                text = adjoin(1, *strcols)
+            else:
+                text = self._join_multiline(*strcols)
 
         self.buf.writelines(text)
 
+    def _join_multiline(self, *strcols):
+        lwidth = self.line_width
+        strcols = list(strcols)
+        if self.index:
+            idx = strcols.pop(0)
+            lwidth -= np.array([len(x) for x in idx]).max()
+
+        col_widths = [np.array([len(x) for x in col]).max()
+                      if len(col) > 0 else 0
+                      for col in strcols]
+        col_bins = _binify(col_widths, lwidth)
+
+        str_lst = []
+        st = 0
+        for ed in col_bins:
+            row = strcols[st:ed]
+            row.insert(0, idx)
+            if ed <= len(strcols):
+                row.append([' \\'] + ['  '] * (len(self.frame) - 1))
+            str_lst.append(adjoin(1, *row))
+            st = ed
+        return '\n\n'.join(str_lst)
+
     def to_latex(self, force_unicode=None, column_format=None):
         """
         Render a DataFrame to a LaTeX tabular environment output.
@@ -1376,6 +1403,17 @@ def _put_lines(buf, lines):
         lines = [unicode(x) for x in lines]
     buf.write('\n'.join(lines))
 
+def _binify(cols, width):
+    bins = []
+    curr_width = 0
+    for i, w in enumerate(cols):
+        curr_width += w
+        if curr_width + 2 > width:
+            bins.append(i)
+            curr_width = w
+        elif i + 1== len(cols):
+            bins.append(i + 1)
+    return bins
 
 if __name__ == '__main__':
     arr = np.array([746.03, 0.00, 5620.00, 1592.36])
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -592,25 +592,28 @@ def _need_info_repr_(self):
         max_rows = (terminal_height if get_option("print.max_rows") == 0
                     else get_option("print.max_rows"))
         max_columns = get_option("print.max_columns")
+        expand_repr = get_option("print.expand_frame_repr")
 
         if max_columns > 0:
             if len(self.index) <= max_rows and \
-                    len(self.columns) <= max_columns:
+                    (len(self.columns) <= max_columns or expand_repr):
                 return False
             else:
                 return True
         else:
             # save us
             if (len(self.index) > max_rows or
                 (com.in_interactive_session() and
-                len(self.columns) > terminal_width // 2)):
+                 len(self.columns) > terminal_width // 2 and
+                 not expand_repr)):
                 return True
             else:
                 buf = StringIO()
                 self.to_string(buf=buf)
                 value = buf.getvalue()
-                if (max([len(l) for l in value.split('\n')]) > terminal_width and
-                    com.in_interactive_session()):
+                if (max([len(l) for l in value.split('\n')]) > terminal_width
+                    and com.in_interactive_session()
+                    and not expand_repr):
                     return True
                 else:
                     return False
@@ -646,13 +649,45 @@ def __unicode__(self):
         if self._need_info_repr_():
             self.info(buf=buf, verbose=self._verbose_info)
         else:
-            self.to_string(buf=buf)
+            is_wide = self._need_wide_repr()
+            line_width = None
+            if is_wide:
+                line_width = get_option('print.line_width')
+            self.to_string(buf=buf, line_width=line_width)
 
         value = buf.getvalue()
         assert type(value) == unicode
 
         return value
 
+    def _need_wide_repr(self):
+        if com.in_qtconsole():
+            terminal_width, terminal_height = 100, 100
+        else:
+            terminal_width, terminal_height = get_terminal_size()
+        max_columns = get_option("print.max_columns")
+        expand_repr = get_option("print.expand_frame_repr")
+
+        if max_columns > 0:
+            if len(self.columns) > max_columns and expand_repr:
+                return True
+        else:
+            # save us
+            if (com.in_interactive_session() and
+                len(self.columns) > terminal_width // 2 and
+                expand_repr):
+                return True
+            else:
+                buf = StringIO()
+                self.to_string(buf=buf)
+                value = buf.getvalue()
+                if (max([len(l) for l in value.split('\n')]) > terminal_width
+                    and com.in_interactive_session()
+                    and expand_repr):
+                    return True
+
+        return False
+
     def __repr__(self):
         """
         Return a string representation for a particular DataFrame
@@ -1454,7 +1489,8 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
     def to_string(self, buf=None, columns=None, col_space=None, colSpace=None,
                   header=True, index=True, na_rep='NaN', formatters=None,
                   float_format=None, sparsify=None, nanRep=None,
-                  index_names=True, justify=None, force_unicode=None):
+                  index_names=True, justify=None, force_unicode=None,
+                  line_width=None):
         """
         Render a DataFrame to a console-friendly tabular output.
         """
@@ -1480,7 +1516,8 @@ def to_string(self, buf=None, columns=None, col_space=None, colSpace=None,
                                            sparsify=sparsify,
                                            justify=justify,
                                            index_names=index_names,
-                                           header=header, index=index)
+                                           header=header, index=index,
+                                           line_width=line_width)
         formatter.to_string()
 
         if buf is None:
diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py
@@ -400,6 +400,110 @@ def test_frame_info_encoding(self):
         repr(df.T)
         fmt.set_printoptions(max_rows=200)
 
+    def test_wide_repr(self):
+        set_option('test.interactive', True)
+        col = lambda l, k: [tm.rands(k) for _ in xrange(l)]
+        df = DataFrame([col(20, 25) for _ in range(10)])
+        rep_str = repr(df)
+        set_option('print.expand_frame_repr', True)
+        wide_repr = repr(df)
+        self.assert_(rep_str != wide_repr)
+
+        set_option('print.line_width', 120)
+        wider_repr = repr(df)
+        self.assert_(len(wider_repr) < len(wide_repr))
+
+        set_option('print.expand_frame_repr', False)
+        set_option('test.interactive', False)
+        set_option('print.line_width', 80)
+
+    def test_wide_repr_named(self):
+        set_option('test.interactive', True)
+        col = lambda l, k: [tm.rands(k) for _ in xrange(l)]
+        df = DataFrame([col(20, 25) for _ in range(10)])
+        df.index.name = 'DataFrame Index'
+        rep_str = repr(df)
+        set_option('print.expand_frame_repr', True)
+        wide_repr = repr(df)
+        self.assert_(rep_str != wide_repr)
+
+        set_option('print.line_width', 120)
+        wider_repr = repr(df)
+        self.assert_(len(wider_repr) < len(wide_repr))
+
+        for line in wide_repr.splitlines()[1::13]:
+            self.assert_('DataFrame Index' in line)
+
+        set_option('print.expand_frame_repr', False)
+        set_option('test.interactive', False)
+        set_option('print.line_width', 80)
+
+    def test_wide_repr_multiindex(self):
+        set_option('test.interactive', True)
+        col = lambda l, k: [tm.rands(k) for _ in xrange(l)]
+        midx = pandas.MultiIndex.from_arrays([np.array(col(10, 5)),
+                                              np.array(col(10, 5))])
+        df = DataFrame([col(20, 25) for _ in range(10)],
+                       index=midx)
+        df.index.names = ['Level 0', 'Level 1']
+        rep_str = repr(df)
+        set_option('print.expand_frame_repr', True)
+        wide_repr = repr(df)
+        self.assert_(rep_str != wide_repr)
+
+        set_option('print.line_width', 120)
+        wider_repr = repr(df)
+        self.assert_(len(wider_repr) < len(wide_repr))
+
+        for line in wide_repr.splitlines()[1::13]:
+            self.assert_('Level 0 Level 1' in line)
+
+        set_option('print.expand_frame_repr', False)
+        set_option('test.interactive', False)
+        set_option('print.line_width', 80)
+
+    def test_wide_repr_multiindex_cols(self):
+        set_option('test.interactive', True)
+        col = lambda l, k: [tm.rands(k) for _ in xrange(l)]
+        midx = pandas.MultiIndex.from_arrays([np.array(col(10, 5)),
+                                              np.array(col(10, 5))])
+        mcols = pandas.MultiIndex.from_arrays([np.array(col(20, 3)),
+                                               np.array(col(20, 3))])
+        df = DataFrame([col(20, 25) for _ in range(10)],
+                       index=midx, columns=mcols)
+        df.index.names = ['Level 0', 'Level 1']
+        rep_str = repr(df)
+        set_option('print.expand_frame_repr', True)
+        wide_repr = repr(df)
+        self.assert_(rep_str != wide_repr)
+
+        set_option('print.line_width', 120)
+        wider_repr = repr(df)
+        self.assert_(len(wider_repr) < len(wide_repr))
+
+        self.assert_(len(wide_repr.splitlines()) == 14 * 10 - 1)
+
+        set_option('print.expand_frame_repr', False)
+        set_option('test.interactive', False)
+        set_option('print.line_width', 80)
+
+    def test_wide_repr_unicode(self):
+        set_option('test.interactive', True)
+        col = lambda l, k: [tm.randu(k) for _ in xrange(l)]
+        df = DataFrame([col(20, 25) for _ in range(10)])
+        rep_str = repr(df)
+        set_option('print.expand_frame_repr', True)
+        wide_repr = repr(df)
+        self.assert_(rep_str != wide_repr)
+
+        set_option('print.line_width', 120)
+        wider_repr = repr(df)
+        self.assert_(len(wider_repr) < len(wide_repr))
+
+        set_option('print.expand_frame_repr', False)
+        set_option('test.interactive', False)
+        set_option('print.line_width', 80)
+
     def test_to_string(self):
         from pandas import read_table
         import re