Skip to content

repr for wider DataFrames #2436

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 7, 2012
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1173,7 +1173,7 @@ def in_interactive_session():
returns True if running under python/ipython interactive shell
"""
import __main__ as main
return not hasattr(main, '__file__')
return not hasattr(main, '__file__') or get_option('test.interactive')

def in_qtconsole():
"""
Expand Down
24 changes: 24 additions & 0 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,20 @@
these are generally strings meant to be displayed on the console.
"""

pc_expand_repr_doc="""
: boolean
Default False
Whether to print out the full DataFrame repr for wide DataFrames
across multiple lines.
If False, the summary representation is shown.
"""

pc_line_width_doc="""
: int
Default 80
When printing wide DataFrames, this is the width of each line.
"""

with cf.config_prefix('print'):
cf.register_option('precision', 7, pc_precision_doc, validator=is_int)
cf.register_option('digits', 7, validator=is_int)
Expand All @@ -99,3 +113,13 @@
validator=is_bool)
cf.register_option('encoding', detect_console_encoding(), pc_encoding_doc,
validator=is_text)
cf.register_option('expand_frame_repr', False, pc_expand_repr_doc)
cf.register_option('line_width', 80, pc_line_width_doc)

tc_interactive_doc="""
: boolean
Default False
Whether to simulate interactive mode for purposes of testing
"""
with cf.config_prefix('test'):
cf.register_option('interactive', False, tc_interactive_doc)
42 changes: 40 additions & 2 deletions pandas/core/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ class DataFrameFormatter(object):
def __init__(self, frame, buf=None, columns=None, col_space=None,
header=True, index=True, na_rep='NaN', formatters=None,
justify=None, float_format=None, sparsify=None,
index_names=True, **kwds):
index_names=True, line_width=None, **kwds):
self.frame = frame
self.buf = buf if buf is not None else StringIO()
self.show_index_names = index_names
Expand All @@ -202,6 +202,7 @@ def __init__(self, frame, buf=None, columns=None, col_space=None,
self.col_space = col_space
self.header = header
self.index = index
self.line_width = line_width

if justify is None:
self.justify = get_option("print.colheader_justify")
Expand Down Expand Up @@ -282,10 +283,36 @@ def to_string(self, force_unicode=None):
text = info_line
else:
strcols = self._to_str_columns()
text = adjoin(1, *strcols)
if self.line_width is None:
text = adjoin(1, *strcols)
else:
text = self._join_multiline(*strcols)

self.buf.writelines(text)

def _join_multiline(self, *strcols):
lwidth = self.line_width
strcols = list(strcols)
if self.index:
idx = strcols.pop(0)
lwidth -= np.array([len(x) for x in idx]).max()

col_widths = [np.array([len(x) for x in col]).max()
if len(col) > 0 else 0
for col in strcols]
col_bins = _binify(col_widths, lwidth)

str_lst = []
st = 0
for ed in col_bins:
row = strcols[st:ed]
row.insert(0, idx)
if ed <= len(strcols):
row.append([' \\'] + [' '] * (len(self.frame) - 1))
str_lst.append(adjoin(1, *row))
st = ed
return '\n\n'.join(str_lst)

def to_latex(self, force_unicode=None, column_format=None):
"""
Render a DataFrame to a LaTeX tabular environment output.
Expand Down Expand Up @@ -1376,6 +1403,17 @@ def _put_lines(buf, lines):
lines = [unicode(x) for x in lines]
buf.write('\n'.join(lines))

def _binify(cols, width):
bins = []
curr_width = 0
for i, w in enumerate(cols):
curr_width += w
if curr_width + 2 > width:
bins.append(i)
curr_width = w
elif i + 1== len(cols):
bins.append(i + 1)
return bins

if __name__ == '__main__':
arr = np.array([746.03, 0.00, 5620.00, 1592.36])
Expand Down
51 changes: 44 additions & 7 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,25 +592,28 @@ def _need_info_repr_(self):
max_rows = (terminal_height if get_option("print.max_rows") == 0
else get_option("print.max_rows"))
max_columns = get_option("print.max_columns")
expand_repr = get_option("print.expand_frame_repr")

if max_columns > 0:
if len(self.index) <= max_rows and \
len(self.columns) <= max_columns:
(len(self.columns) <= max_columns or expand_repr):
return False
else:
return True
else:
# save us
if (len(self.index) > max_rows or
(com.in_interactive_session() and
len(self.columns) > terminal_width // 2)):
len(self.columns) > terminal_width // 2 and
not expand_repr)):
return True
else:
buf = StringIO()
self.to_string(buf=buf)
value = buf.getvalue()
if (max([len(l) for l in value.split('\n')]) > terminal_width and
com.in_interactive_session()):
if (max([len(l) for l in value.split('\n')]) > terminal_width
and com.in_interactive_session()
and not expand_repr):
return True
else:
return False
Expand Down Expand Up @@ -646,13 +649,45 @@ def __unicode__(self):
if self._need_info_repr_():
self.info(buf=buf, verbose=self._verbose_info)
else:
self.to_string(buf=buf)
is_wide = self._need_wide_repr()
line_width = None
if is_wide:
line_width = get_option('print.line_width')
self.to_string(buf=buf, line_width=line_width)

value = buf.getvalue()
assert type(value) == unicode

return value

def _need_wide_repr(self):
if com.in_qtconsole():
terminal_width, terminal_height = 100, 100
else:
terminal_width, terminal_height = get_terminal_size()
max_columns = get_option("print.max_columns")
expand_repr = get_option("print.expand_frame_repr")

if max_columns > 0:
if len(self.columns) > max_columns and expand_repr:
return True
else:
# save us
if (com.in_interactive_session() and
len(self.columns) > terminal_width // 2 and
expand_repr):
return True
else:
buf = StringIO()
self.to_string(buf=buf)
value = buf.getvalue()
if (max([len(l) for l in value.split('\n')]) > terminal_width
and com.in_interactive_session()
and expand_repr):
return True

return False

def __repr__(self):
"""
Return a string representation for a particular DataFrame
Expand Down Expand Up @@ -1450,7 +1485,8 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
def to_string(self, buf=None, columns=None, col_space=None, colSpace=None,
header=True, index=True, na_rep='NaN', formatters=None,
float_format=None, sparsify=None, nanRep=None,
index_names=True, justify=None, force_unicode=None):
index_names=True, justify=None, force_unicode=None,
line_width=None):
"""
Render a DataFrame to a console-friendly tabular output.
"""
Expand All @@ -1476,7 +1512,8 @@ def to_string(self, buf=None, columns=None, col_space=None, colSpace=None,
sparsify=sparsify,
justify=justify,
index_names=index_names,
header=header, index=index)
header=header, index=index,
line_width=line_width)
formatter.to_string()

if buf is None:
Expand Down
104 changes: 104 additions & 0 deletions pandas/tests/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,110 @@ def test_frame_info_encoding(self):
repr(df.T)
fmt.set_printoptions(max_rows=200)

def test_wide_repr(self):
set_option('test.interactive', True)
col = lambda l, k: [tm.rands(k) for _ in xrange(l)]
df = DataFrame([col(20, 25) for _ in range(10)])
rep_str = repr(df)
set_option('print.expand_frame_repr', True)
wide_repr = repr(df)
self.assert_(rep_str != wide_repr)

set_option('print.line_width', 120)
wider_repr = repr(df)
self.assert_(len(wider_repr) < len(wide_repr))

set_option('print.expand_frame_repr', False)
set_option('test.interactive', False)
set_option('print.line_width', 80)

def test_wide_repr_named(self):
set_option('test.interactive', True)
col = lambda l, k: [tm.rands(k) for _ in xrange(l)]
df = DataFrame([col(20, 25) for _ in range(10)])
df.index.name = 'DataFrame Index'
rep_str = repr(df)
set_option('print.expand_frame_repr', True)
wide_repr = repr(df)
self.assert_(rep_str != wide_repr)

set_option('print.line_width', 120)
wider_repr = repr(df)
self.assert_(len(wider_repr) < len(wide_repr))

for line in wide_repr.splitlines()[1::13]:
self.assert_('DataFrame Index' in line)

set_option('print.expand_frame_repr', False)
set_option('test.interactive', False)
set_option('print.line_width', 80)

def test_wide_repr_multiindex(self):
set_option('test.interactive', True)
col = lambda l, k: [tm.rands(k) for _ in xrange(l)]
midx = pandas.MultiIndex.from_arrays([np.array(col(10, 5)),
np.array(col(10, 5))])
df = DataFrame([col(20, 25) for _ in range(10)],
index=midx)
df.index.names = ['Level 0', 'Level 1']
rep_str = repr(df)
set_option('print.expand_frame_repr', True)
wide_repr = repr(df)
self.assert_(rep_str != wide_repr)

set_option('print.line_width', 120)
wider_repr = repr(df)
self.assert_(len(wider_repr) < len(wide_repr))

for line in wide_repr.splitlines()[1::13]:
self.assert_('Level 0 Level 1' in line)

set_option('print.expand_frame_repr', False)
set_option('test.interactive', False)
set_option('print.line_width', 80)

def test_wide_repr_multiindex_cols(self):
set_option('test.interactive', True)
col = lambda l, k: [tm.rands(k) for _ in xrange(l)]
midx = pandas.MultiIndex.from_arrays([np.array(col(10, 5)),
np.array(col(10, 5))])
mcols = pandas.MultiIndex.from_arrays([np.array(col(20, 3)),
np.array(col(20, 3))])
df = DataFrame([col(20, 25) for _ in range(10)],
index=midx, columns=mcols)
df.index.names = ['Level 0', 'Level 1']
rep_str = repr(df)
set_option('print.expand_frame_repr', True)
wide_repr = repr(df)
self.assert_(rep_str != wide_repr)

set_option('print.line_width', 120)
wider_repr = repr(df)
self.assert_(len(wider_repr) < len(wide_repr))

self.assert_(len(wide_repr.splitlines()) == 14 * 10 - 1)

set_option('print.expand_frame_repr', False)
set_option('test.interactive', False)
set_option('print.line_width', 80)

def test_wide_repr_unicode(self):
set_option('test.interactive', True)
col = lambda l, k: [tm.randu(k) for _ in xrange(l)]
df = DataFrame([col(20, 25) for _ in range(10)])
rep_str = repr(df)
set_option('print.expand_frame_repr', True)
wide_repr = repr(df)
self.assert_(rep_str != wide_repr)

set_option('print.line_width', 120)
wider_repr = repr(df)
self.assert_(len(wider_repr) < len(wide_repr))

set_option('print.expand_frame_repr', False)
set_option('test.interactive', False)
set_option('print.line_width', 80)

def test_to_string(self):
from pandas import read_table
import re
Expand Down