Skip to content

FIX: Fix problems with Series text representation. #9182

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 17, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions doc/source/whatsnew/v0.16.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -567,3 +567,39 @@ Bug Fixes
- Bug in ``Series.values_counts`` with excluding ``NaN`` for categorical type ``Series`` with ``dropna=True`` (:issue:`9443`)
- Fixed mising numeric_only option for ``DataFrame.std/var/sem`` (:issue:`9201`)
- Support constructing ``Panel`` or ``Panel4D`` with scalar data (:issue:`8285`)
- ``Series`` text representation disconnected from `max_rows`/`max_columns` (:issue:`7508`).
- ``Series`` number formatting inconsistent when truncated (:issue:`8532`).

Previous Behavior

.. code-block:: python

In [2]: pd.options.display.max_rows = 10
In [3]: s = pd.Series([1,1,1,1,1,1,1,1,1,1,0.9999,1,1]*10)
In [4]: s
Out[4]:
0 1
1 1
2 1
...
127 0.9999
128 1.0000
129 1.0000
Length: 130, dtype: float64

New Behavior

.. code-block:: python

0 1.0000
1 1.0000
2 1.0000
3 1.0000
4 1.0000
...
125 1.0000
126 1.0000
127 0.9999
128 1.0000
129 1.0000
dtype: float64
87 changes: 57 additions & 30 deletions pandas/core/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,62 +129,80 @@ def to_string(self):

class SeriesFormatter(object):

def __init__(self, series, buf=None, header=True, length=True,
na_rep='NaN', name=False, float_format=None, dtype=True):
def __init__(self, series, buf=None, length=True, header=True,
na_rep='NaN', name=False, float_format=None, dtype=True,
max_rows=None):
self.series = series
self.buf = buf if buf is not None else StringIO()
self.name = name
self.na_rep = na_rep
self.length = length
self.header = header
self.length = length
self.max_rows = max_rows

if float_format is None:
float_format = get_option("display.float_format")
self.float_format = float_format
self.dtype = dtype

self._chk_truncate()

def _chk_truncate(self):
from pandas.tools.merge import concat
max_rows = self.max_rows
truncate_v = max_rows and (len(self.series) > max_rows)
series = self.series
if truncate_v:
if max_rows == 1:
row_num = max_rows
series = series.iloc[:max_rows]
else:
row_num = max_rows // 2
series = concat((series.iloc[:row_num], series.iloc[-row_num:]))
self.tr_row_num = row_num
self.tr_series = series
self.truncate_v = truncate_v

def _get_footer(self):
name = self.series.name
footer = u('')

if self.name:
if getattr(self.series.index, 'freq', None):
footer += 'Freq: %s' % self.series.index.freqstr
if getattr(self.series.index, 'freq', None) is not None:
footer += 'Freq: %s' % self.series.index.freqstr

if footer and self.series.name is not None:
# categories have already a comma + linebreak
if not com.is_categorical_dtype(self.series.dtype):
footer += ', '
if self.name is not False and name is not None:
if footer:
footer += ', '

series_name = com.pprint_thing(self.series.name,
series_name = com.pprint_thing(name,
escape_chars=('\t', '\r', '\n'))
footer += ("Name: %s" %
series_name) if self.series.name is not None else ""
series_name) if name is not None else ""

if self.length:
if footer:
footer += ', '
footer += 'Length: %d' % len(self.series)

# TODO: in tidy_repr, with freq index, no dtype is shown -> also include a guard here?
if self.dtype:
name = getattr(self.series.dtype, 'name', None)
if self.dtype is not False and self.dtype is not None:
name = getattr(self.tr_series.dtype, 'name', None)
if name:
if footer:
footer += ', '
footer += 'dtype: %s' % com.pprint_thing(name)

# level infos are added to the end and in a new line, like it is done for Categoricals
# Only added when we request a name
if self.name and com.is_categorical_dtype(self.series.dtype):
level_info = self.series.values._repr_categories_info()
if name and com.is_categorical_dtype(self.tr_series.dtype):
level_info = self.tr_series.values._repr_categories_info()
if footer:
footer += "\n"
footer += level_info

return compat.text_type(footer)

def _get_formatted_index(self):
index = self.series.index
index = self.tr_series.index
is_multi = isinstance(index, MultiIndex)

if is_multi:
Expand All @@ -196,35 +214,44 @@ def _get_formatted_index(self):
return fmt_index, have_header

def _get_formatted_values(self):
return format_array(self.series.get_values(), None,
return format_array(self.tr_series.get_values(), None,
float_format=self.float_format,
na_rep=self.na_rep)

def to_string(self):
series = self.series
series = self.tr_series
footer = self._get_footer()

if len(series) == 0:
return u('')
return 'Series([], ' + footer + ')'

fmt_index, have_header = self._get_formatted_index()
fmt_values = self._get_formatted_values()

maxlen = max(len(x) for x in fmt_index)
maxlen = max(len(x) for x in fmt_index) # max index len
pad_space = min(maxlen, 60)

result = ['%s %s'] * len(fmt_values)
for i, (k, v) in enumerate(zip(fmt_index[1:], fmt_values)):
idx = k.ljust(pad_space)
result[i] = result[i] % (idx, v)
if self.truncate_v:
n_header_rows = 0
row_num = self.tr_row_num
width = len(fmt_values[row_num-1])
if width > 3:
dot_str = '...'
else:
dot_str = '..'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a good reason for changing the number of dots? If so, please add a test that covers this case.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We use this on dfs also. If you have many columns with only one or two length strings then it wastes space to add a '...' everywhere. Going down to only one dot I thought could be misleading so that's where the two dots come from.

dot_str = dot_str.center(width)
fmt_values.insert(row_num + n_header_rows, dot_str)
fmt_index.insert(row_num + 1, '')

result = adjoin(3, *[fmt_index[1:], fmt_values])

if self.header and have_header:
result.insert(0, fmt_index[0])
result = fmt_index[0] + '\n' + result

footer = self._get_footer()
if footer:
result.append(footer)
result += '\n' + footer

return compat.text_type(u('\n').join(result))
return compat.text_type(u('').join(result))


def _strlen_func():
Expand Down
66 changes: 23 additions & 43 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
from pandas.tseries.period import PeriodIndex, Period
from pandas import compat
from pandas.util.terminal import get_terminal_size
from pandas.compat import zip, u, OrderedDict
from pandas.compat import zip, u, OrderedDict, StringIO

import pandas.core.ops as ops
from pandas.core.algorithms import select_n
Expand Down Expand Up @@ -883,43 +883,16 @@ def __unicode__(self):
Invoked by unicode(df) in py2 only. Yields a Unicode String in both
py2/py3.
"""
buf = StringIO(u(""))
width, height = get_terminal_size()
max_rows = (height if get_option("display.max_rows") == 0
else get_option("display.max_rows"))
if max_rows and len(self.index) > max_rows:
result = self._tidy_repr(min(30, max_rows - 4))
elif len(self.index) > 0:
result = self._get_repr(print_header=True,
length=len(self) > 50,
name=True,
dtype=True)
elif self.name is None:
result = u('Series([], dtype: %s)') % (self.dtype)
else:
result = u('Series([], name: %s, dtype: %s)') % (self.name,
self.dtype)
return result

def _tidy_repr(self, max_vals=20):
"""
self.to_string(buf=buf, name=self.name, dtype=self.dtype,
max_rows=max_rows)
result = buf.getvalue()

Internal function, should always return unicode string
"""
if max_vals > 1:
num = max_vals // 2
else:
num = 1
max_vals = 2
head = self.iloc[:num]._get_repr(print_header=True, length=False,
dtype=False, name=False)
tail = self.iloc[-(max_vals - num):]._get_repr(print_header=False,
length=False,
name=False,
dtype=False)
result = head + '\n...\n' + tail
result = '%s\n%s' % (result, self._repr_footer())

return compat.text_type(result)
return result

def _repr_footer(self):

Expand Down Expand Up @@ -948,8 +921,8 @@ def _repr_footer(self):
len(self),
str(self.dtype.name))

def to_string(self, buf=None, na_rep='NaN', float_format=None,
length=False, dtype=False, name=False):
def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True,
length=False, dtype=False, name=False, max_rows=None):
"""
Render a string representation of the Series

Expand All @@ -962,20 +935,26 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None,
float_format : one-parameter function, optional
formatter function to apply to columns' elements if they are floats
default None
header: boolean, default True
Add the Series header (index name)
length : boolean, default False
Add the Series length
dtype : boolean, default False
Add the Series dtype
name : boolean, default False
Add the Series name (which may be None)
Add the Series name if not None
max_rows : int, optional
Maximum number of rows to show before truncating. If None, show
all.

Returns
-------
formatted : string (if not buffer passed)
"""

the_repr = self._get_repr(float_format=float_format, na_rep=na_rep,
length=length, dtype=dtype, name=name)
header=header, length=length, dtype=dtype,
name=name, max_rows=max_rows)

# catch contract violations
if not isinstance(the_repr, compat.text_type):
Expand All @@ -993,17 +972,18 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None,
f.write(the_repr)

def _get_repr(
self, name=False, print_header=False, length=True, dtype=True,
na_rep='NaN', float_format=None):
self, name=False, header=True, length=True, dtype=True, na_rep='NaN',
float_format=None, max_rows=None):
"""

Internal function, should always return unicode string
"""

formatter = fmt.SeriesFormatter(self, name=name, header=print_header,
length=length, dtype=dtype,
formatter = fmt.SeriesFormatter(self, name=name,
length=length, header=header,
dtype=dtype,
na_rep=na_rep,
float_format=float_format)
float_format=float_format,
max_rows=max_rows)
result = formatter.to_string()

# TODO: following check prob. not neces.
Expand Down
14 changes: 7 additions & 7 deletions pandas/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

from pandas import Categorical, Index, Series, DataFrame, PeriodIndex, Timestamp

from pandas.core.config import option_context
import pandas.core.common as com
import pandas.compat as compat
import pandas.util.testing as tm
Expand Down Expand Up @@ -1559,12 +1560,12 @@ def test_repr(self):

self.assertEqual(exp, a.__unicode__())

a = pd.Series(pd.Categorical(["a","b"] *25, name="a", ordered=True))
exp = u("".join(["%s a\n%s b\n"%(i,i+1) for i in range(0,10,2)]) + "...\n" +
"".join(["%s a\n%s b\n"%(i,i+1) for i in range(40,50,2)]) +
"Name: a, Length: 50, dtype: category\n" +
"Categories (2, object): [a < b]")
self.assertEqual(exp,a._tidy_repr())
a = pd.Series(pd.Categorical(["a","b"] *25, name="a"))
exp = u("0 a\n1 b\n" + " ..\n" +
"48 a\n49 b\n" +
"Name: a, dtype: category\nCategories (2, object): [a, b]")
with option_context("display.max_rows", 5):
self.assertEqual(exp, repr(a))

levs = list("abcdefghijklmnopqrstuvwxyz")
a = pd.Series(pd.Categorical(["a","b"], name="a", categories=levs, ordered=True))
Expand All @@ -1573,7 +1574,6 @@ def test_repr(self):
"Categories (26, object): [a < b < c < d ... w < x < y < z]")
self.assertEqual(exp,a.__unicode__())


def test_info(self):

# make sure it works
Expand Down
Loading