Skip to content

BUG: show series length in repr when truncated (GH15962) #15974

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,8 @@ Other Enhancements
- :method:`~MultiIndex.remove_unused_levels` has been added to facilitate :ref:`removing unused levels <advanced.shown_levels>`. (:issue:`15694`)
- ``pd.read_csv()`` will now raise a ``ParserError`` error whenever any parsing error occurs (:issue:`15913`, :issue:`15925`)
- ``pd.read_csv()`` now supports the ``error_bad_lines`` and ``warn_bad_lines`` arguments for the Python parser (:issue:`15925`)
- The ``display.show_dimensions`` option can now also be used to specify
whether the length of a ``Series`` should be shown in its repr (:issue:`7117`).
- ``parallel_coordinates()`` has gained a ``sort_labels`` keyword arg that sorts class labels and the colours assigned to them (:issue:`15908`)


Expand Down Expand Up @@ -1560,6 +1562,7 @@ I/O
- Bug in ``pd.read_hdf()`` passing a ``Timestamp`` to the ``where`` parameter with a non date column (:issue:`15492`)
- Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`)
- Bug in ``StataReader`` and ``StataWriter`` which allows invalid encodings (:issue:`15723`)
- Bug in the ``Series`` repr not showing the length when the output was truncated (:issue:`15962`).

Plotting
^^^^^^^^
Expand Down
40 changes: 12 additions & 28 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -980,9 +980,10 @@ def __unicode__(self):
width, height = get_terminal_size()
max_rows = (height if get_option("display.max_rows") == 0 else
get_option("display.max_rows"))
show_dimensions = get_option("display.show_dimensions")

self.to_string(buf=buf, name=self.name, dtype=self.dtype,
max_rows=max_rows)
max_rows=max_rows, length=show_dimensions)
result = buf.getvalue()

return result
Expand Down Expand Up @@ -1021,44 +1022,27 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True,
formatted : string (if not buffer passed)
"""

the_repr = self._get_repr(float_format=float_format, na_rep=na_rep,
header=header, index=index, length=length,
dtype=dtype, name=name, max_rows=max_rows)

# catch contract violations
if not isinstance(the_repr, compat.text_type):
raise AssertionError("result must be of type unicode, type"
" of result is {0!r}"
"".format(the_repr.__class__.__name__))

if buf is None:
return the_repr
else:
try:
buf.write(the_repr)
except AttributeError:
with open(buf, 'w') as f:
f.write(the_repr)

def _get_repr(self, name=False, header=True, index=True, length=True,
dtype=True, na_rep='NaN', float_format=None, max_rows=None):
"""

Internal function, should always return unicode string
"""
formatter = fmt.SeriesFormatter(self, name=name, length=length,
header=header, index=index,
dtype=dtype, na_rep=na_rep,
float_format=float_format,
max_rows=max_rows)
result = formatter.to_string()

# TODO: following check prob. not neces.
# catch contract violations
if not isinstance(result, compat.text_type):
raise AssertionError("result must be of type unicode, type"
" of result is {0!r}"
"".format(result.__class__.__name__))
return result

if buf is None:
return result
else:
try:
buf.write(result)
except AttributeError:
with open(buf, 'w') as f:
f.write(result)

def __iter__(self):
""" provide iteration over the values of the Series
Expand Down
3 changes: 2 additions & 1 deletion pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,8 @@ def _get_footer(self):
escape_chars=('\t', '\r', '\n'))
footer += ("Name: %s" % series_name) if name is not None else ""

if self.length:
if (self.length is True or
(self.length == 'truncate' and self.truncate_v)):
if footer:
footer += ', '
footer += 'Length: %d' % len(self.series)
Expand Down
33 changes: 27 additions & 6 deletions pandas/tests/io/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1770,12 +1770,14 @@ def test_east_asian_unicode_series(self):
name=u'おおおおおおお')

expected = (u"0 あ\n ... \n"
u"3 ええええ\nName: おおおおおおお, dtype: object")
u"3 ええええ\n"
u"Name: おおおおおおお, Length: 4, dtype: object")
self.assertEqual(_rep(s), expected)

s.index = [u'ああ', u'いいいい', u'う', u'えええ']
expected = (u"ああ あ\n ... \n"
u"えええ ええええ\nName: おおおおおおお, dtype: object")
u"えええ ええええ\n"
u"Name: おおおおおおお, Length: 4, dtype: object")
self.assertEqual(_rep(s), expected)

# Emable Unicode option -----------------------------------------
Expand Down Expand Up @@ -1846,14 +1848,15 @@ def test_east_asian_unicode_series(self):
s = Series([u'あ', u'いい', u'ううう', u'ええええ'],
name=u'おおおおおおお')
expected = (u"0 あ\n ... \n"
u"3 ええええ\nName: おおおおおおお, dtype: object")
u"3 ええええ\n"
u"Name: おおおおおおお, Length: 4, dtype: object")
self.assertEqual(_rep(s), expected)

s.index = [u'ああ', u'いいいい', u'う', u'えええ']
expected = (u"ああ あ\n"
u" ... \n"
u"えええ ええええ\n"
u"Name: おおおおおおお, dtype: object")
u"Name: おおおおおおお, Length: 4, dtype: object")
self.assertEqual(_rep(s), expected)

# ambiguous unicode
Expand Down Expand Up @@ -2021,7 +2024,8 @@ def test_max_multi_index_display(self):
# Make sure #8532 is fixed
def test_consistent_format(self):
s = pd.Series([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.9999, 1, 1] * 10)
with option_context("display.max_rows", 10):
with option_context("display.max_rows", 10,
"display.show_dimensions", False):
res = repr(s)
exp = ('0 1.0000\n1 1.0000\n2 1.0000\n3 '
'1.0000\n4 1.0000\n ... \n125 '
Expand All @@ -2040,7 +2044,8 @@ def chck_ncols(self, s):

def test_format_explicit(self):
test_sers = gen_series_formatting()
with option_context("display.max_rows", 4):
with option_context("display.max_rows", 4,
"display.show_dimensions", False):
res = repr(test_sers['onel'])
exp = '0 a\n1 a\n ..\n98 a\n99 a\ndtype: object'
self.assertEqual(exp, res)
Expand Down Expand Up @@ -2087,6 +2092,22 @@ def getndots(s):
strrepr = repr(s).replace('\n', '')
self.assertEqual(getndots(strrepr), 3)

def test_show_dimensions(self):
# gh-7117
s = Series(range(5))

assert 'Length' not in repr(s)

with option_context("display.max_rows", 4):
assert 'Length' in repr(s)

with option_context("display.show_dimensions", True):
assert 'Length' in repr(s)

with option_context("display.max_rows", 4,
"display.show_dimensions", False):
assert 'Length' not in repr(s)

def test_to_string_name(self):
s = Series(range(100), dtype='int64')
s.name = 'myser'
Expand Down
10 changes: 6 additions & 4 deletions pandas/tests/sparse/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_sparse_max_row(self):
# GH 10560
result = repr(s)
exp = ("0 1.0\n ... \n4 NaN\n"
"dtype: float64\nBlockIndex\n"
"Length: 5, dtype: float64\nBlockIndex\n"
"Block locations: array([0, 3]{0})\n"
"Block lengths: array([1, 1]{0})".format(dfm))
self.assertEqual(result, exp)
Expand All @@ -52,7 +52,8 @@ def test_sparse_mi_max_row(self):
"Block lengths: array([1, 1]{0})".format(dfm))
self.assertEqual(result, exp)

with option_context("display.max_rows", 3):
with option_context("display.max_rows", 3,
"display.show_dimensions", False):
# GH 13144
result = repr(s)
exp = ("A 0 1.0\n ... \nC 2 NaN\n"
Expand All @@ -77,7 +78,7 @@ def test_sparse_bool(self):
with option_context("display.max_rows", 3):
result = repr(s)
exp = ("0 True\n ... \n5 False\n"
"dtype: bool\nBlockIndex\n"
"Length: 6, dtype: bool\nBlockIndex\n"
"Block locations: array([0, 3]{0})\n"
"Block lengths: array([1, 1]{0})".format(dtype))
self.assertEqual(result, exp)
Expand All @@ -94,7 +95,8 @@ def test_sparse_int(self):
"Block lengths: array([1, 1]{0})".format(dtype))
self.assertEqual(result, exp)

with option_context("display.max_rows", 3):
with option_context("display.max_rows", 3,
"display.show_dimensions", False):
result = repr(s)
exp = ("0 0\n ..\n5 0\n"
"dtype: int64\nBlockIndex\n"
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2088,7 +2088,7 @@ def test_repr(self):

a = pd.Series(pd.Categorical(["a", "b"] * 25))
exp = u("0 a\n1 b\n" + " ..\n" + "48 a\n49 b\n" +
"dtype: category\nCategories (2, object): [a, b]")
"Length: 50, dtype: category\nCategories (2, object): [a, b]")
with option_context("display.max_rows", 5):
self.assertEqual(exp, repr(a))

Expand Down