Skip to content

Commit 7d715fa

Browse files
committed
FIX: Series text representation
1 parent 8f422e2 commit 7d715fa

File tree

6 files changed

+256
-86
lines changed

6 files changed

+256
-86
lines changed

doc/source/whatsnew/v0.16.0.txt

+36
Original file line numberDiff line numberDiff line change
@@ -567,3 +567,39 @@ Bug Fixes
567567
- Bug in ``Series.values_counts`` with excluding ``NaN`` for categorical type ``Series`` with ``dropna=True`` (:issue:`9443`)
568568
- Fixed mising numeric_only option for ``DataFrame.std/var/sem`` (:issue:`9201`)
569569
- Support constructing ``Panel`` or ``Panel4D`` with scalar data (:issue:`8285`)
570+
- ``Series`` text representation disconnected from `max_rows`/`max_columns` (:issue:`7508`).
571+
- ``Series`` number formatting inconsistent when truncated (:issue:`8532`).
572+
573+
Previous Behavior
574+
575+
.. code-block:: python
576+
577+
In [2]: pd.options.display.max_rows = 10
578+
In [3]: s = pd.Series([1,1,1,1,1,1,1,1,1,1,0.9999,1,1]*10)
579+
In [4]: s
580+
Out[4]:
581+
0 1
582+
1 1
583+
2 1
584+
...
585+
127 0.9999
586+
128 1.0000
587+
129 1.0000
588+
Length: 130, dtype: float64
589+
590+
New Behavior
591+
592+
.. code-block:: python
593+
594+
0 1.0000
595+
1 1.0000
596+
2 1.0000
597+
3 1.0000
598+
4 1.0000
599+
...
600+
125 1.0000
601+
126 1.0000
602+
127 0.9999
603+
128 1.0000
604+
129 1.0000
605+
dtype: float64

pandas/core/format.py

+57-30
Original file line numberDiff line numberDiff line change
@@ -129,62 +129,80 @@ def to_string(self):
129129

130130
class SeriesFormatter(object):
131131

132-
def __init__(self, series, buf=None, header=True, length=True,
133-
na_rep='NaN', name=False, float_format=None, dtype=True):
132+
def __init__(self, series, buf=None, length=True, header=True,
133+
na_rep='NaN', name=False, float_format=None, dtype=True,
134+
max_rows=None):
134135
self.series = series
135136
self.buf = buf if buf is not None else StringIO()
136137
self.name = name
137138
self.na_rep = na_rep
138-
self.length = length
139139
self.header = header
140+
self.length = length
141+
self.max_rows = max_rows
140142

141143
if float_format is None:
142144
float_format = get_option("display.float_format")
143145
self.float_format = float_format
144146
self.dtype = dtype
145147

148+
self._chk_truncate()
149+
150+
def _chk_truncate(self):
151+
from pandas.tools.merge import concat
152+
max_rows = self.max_rows
153+
truncate_v = max_rows and (len(self.series) > max_rows)
154+
series = self.series
155+
if truncate_v:
156+
if max_rows == 1:
157+
row_num = max_rows
158+
series = series.iloc[:max_rows]
159+
else:
160+
row_num = max_rows // 2
161+
series = concat((series.iloc[:row_num], series.iloc[-row_num:]))
162+
self.tr_row_num = row_num
163+
self.tr_series = series
164+
self.truncate_v = truncate_v
165+
146166
def _get_footer(self):
167+
name = self.series.name
147168
footer = u('')
148169

149-
if self.name:
150-
if getattr(self.series.index, 'freq', None):
151-
footer += 'Freq: %s' % self.series.index.freqstr
170+
if getattr(self.series.index, 'freq', None) is not None:
171+
footer += 'Freq: %s' % self.series.index.freqstr
152172

153-
if footer and self.series.name is not None:
154-
# categories have already a comma + linebreak
155-
if not com.is_categorical_dtype(self.series.dtype):
156-
footer += ', '
173+
if self.name is not False and name is not None:
174+
if footer:
175+
footer += ', '
157176

158-
series_name = com.pprint_thing(self.series.name,
177+
series_name = com.pprint_thing(name,
159178
escape_chars=('\t', '\r', '\n'))
160179
footer += ("Name: %s" %
161-
series_name) if self.series.name is not None else ""
180+
series_name) if name is not None else ""
162181

163182
if self.length:
164183
if footer:
165184
footer += ', '
166185
footer += 'Length: %d' % len(self.series)
167186

168-
# TODO: in tidy_repr, with freq index, no dtype is shown -> also include a guard here?
169-
if self.dtype:
170-
name = getattr(self.series.dtype, 'name', None)
187+
if self.dtype is not False and self.dtype is not None:
188+
name = getattr(self.tr_series.dtype, 'name', None)
171189
if name:
172190
if footer:
173191
footer += ', '
174192
footer += 'dtype: %s' % com.pprint_thing(name)
175193

176194
# level infos are added to the end and in a new line, like it is done for Categoricals
177195
# Only added when we request a name
178-
if self.name and com.is_categorical_dtype(self.series.dtype):
179-
level_info = self.series.values._repr_categories_info()
196+
if name and com.is_categorical_dtype(self.tr_series.dtype):
197+
level_info = self.tr_series.values._repr_categories_info()
180198
if footer:
181199
footer += "\n"
182200
footer += level_info
183201

184202
return compat.text_type(footer)
185203

186204
def _get_formatted_index(self):
187-
index = self.series.index
205+
index = self.tr_series.index
188206
is_multi = isinstance(index, MultiIndex)
189207

190208
if is_multi:
@@ -196,35 +214,44 @@ def _get_formatted_index(self):
196214
return fmt_index, have_header
197215

198216
def _get_formatted_values(self):
199-
return format_array(self.series.get_values(), None,
217+
return format_array(self.tr_series.get_values(), None,
200218
float_format=self.float_format,
201219
na_rep=self.na_rep)
202220

203221
def to_string(self):
204-
series = self.series
222+
series = self.tr_series
223+
footer = self._get_footer()
205224

206225
if len(series) == 0:
207-
return u('')
226+
return 'Series([], ' + footer + ')'
208227

209228
fmt_index, have_header = self._get_formatted_index()
210229
fmt_values = self._get_formatted_values()
211230

212-
maxlen = max(len(x) for x in fmt_index)
231+
maxlen = max(len(x) for x in fmt_index) # max index len
213232
pad_space = min(maxlen, 60)
214233

215-
result = ['%s %s'] * len(fmt_values)
216-
for i, (k, v) in enumerate(zip(fmt_index[1:], fmt_values)):
217-
idx = k.ljust(pad_space)
218-
result[i] = result[i] % (idx, v)
234+
if self.truncate_v:
235+
n_header_rows = 0
236+
row_num = self.tr_row_num
237+
width = len(fmt_values[row_num-1])
238+
if width > 3:
239+
dot_str = '...'
240+
else:
241+
dot_str = '..'
242+
dot_str = dot_str.center(width)
243+
fmt_values.insert(row_num + n_header_rows, dot_str)
244+
fmt_index.insert(row_num + 1, '')
245+
246+
result = adjoin(3, *[fmt_index[1:], fmt_values])
219247

220248
if self.header and have_header:
221-
result.insert(0, fmt_index[0])
249+
result = fmt_index[0] + '\n' + result
222250

223-
footer = self._get_footer()
224251
if footer:
225-
result.append(footer)
252+
result += '\n' + footer
226253

227-
return compat.text_type(u('\n').join(result))
254+
return compat.text_type(u('').join(result))
228255

229256

230257
def _strlen_func():

pandas/core/series.py

+23-43
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
from pandas.tseries.period import PeriodIndex, Period
3737
from pandas import compat
3838
from pandas.util.terminal import get_terminal_size
39-
from pandas.compat import zip, u, OrderedDict
39+
from pandas.compat import zip, u, OrderedDict, StringIO
4040

4141
import pandas.core.ops as ops
4242
from pandas.core.algorithms import select_n
@@ -883,43 +883,16 @@ def __unicode__(self):
883883
Invoked by unicode(df) in py2 only. Yields a Unicode String in both
884884
py2/py3.
885885
"""
886+
buf = StringIO(u(""))
886887
width, height = get_terminal_size()
887888
max_rows = (height if get_option("display.max_rows") == 0
888889
else get_option("display.max_rows"))
889-
if max_rows and len(self.index) > max_rows:
890-
result = self._tidy_repr(min(30, max_rows - 4))
891-
elif len(self.index) > 0:
892-
result = self._get_repr(print_header=True,
893-
length=len(self) > 50,
894-
name=True,
895-
dtype=True)
896-
elif self.name is None:
897-
result = u('Series([], dtype: %s)') % (self.dtype)
898-
else:
899-
result = u('Series([], name: %s, dtype: %s)') % (self.name,
900-
self.dtype)
901-
return result
902890

903-
def _tidy_repr(self, max_vals=20):
904-
"""
891+
self.to_string(buf=buf, name=self.name, dtype=self.dtype,
892+
max_rows=max_rows)
893+
result = buf.getvalue()
905894

906-
Internal function, should always return unicode string
907-
"""
908-
if max_vals > 1:
909-
num = max_vals // 2
910-
else:
911-
num = 1
912-
max_vals = 2
913-
head = self.iloc[:num]._get_repr(print_header=True, length=False,
914-
dtype=False, name=False)
915-
tail = self.iloc[-(max_vals - num):]._get_repr(print_header=False,
916-
length=False,
917-
name=False,
918-
dtype=False)
919-
result = head + '\n...\n' + tail
920-
result = '%s\n%s' % (result, self._repr_footer())
921-
922-
return compat.text_type(result)
895+
return result
923896

924897
def _repr_footer(self):
925898

@@ -948,8 +921,8 @@ def _repr_footer(self):
948921
len(self),
949922
str(self.dtype.name))
950923

951-
def to_string(self, buf=None, na_rep='NaN', float_format=None,
952-
length=False, dtype=False, name=False):
924+
def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True,
925+
length=False, dtype=False, name=False, max_rows=None):
953926
"""
954927
Render a string representation of the Series
955928
@@ -962,20 +935,26 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None,
962935
float_format : one-parameter function, optional
963936
formatter function to apply to columns' elements if they are floats
964937
default None
938+
header: boolean, default True
939+
Add the Series header (index name)
965940
length : boolean, default False
966941
Add the Series length
967942
dtype : boolean, default False
968943
Add the Series dtype
969944
name : boolean, default False
970-
Add the Series name (which may be None)
945+
Add the Series name if not None
946+
max_rows : int, optional
947+
Maximum number of rows to show before truncating. If None, show
948+
all.
971949
972950
Returns
973951
-------
974952
formatted : string (if not buffer passed)
975953
"""
976954

977955
the_repr = self._get_repr(float_format=float_format, na_rep=na_rep,
978-
length=length, dtype=dtype, name=name)
956+
header=header, length=length, dtype=dtype,
957+
name=name, max_rows=max_rows)
979958

980959
# catch contract violations
981960
if not isinstance(the_repr, compat.text_type):
@@ -993,17 +972,18 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None,
993972
f.write(the_repr)
994973

995974
def _get_repr(
996-
self, name=False, print_header=False, length=True, dtype=True,
997-
na_rep='NaN', float_format=None):
975+
self, name=False, header=True, length=True, dtype=True, na_rep='NaN',
976+
float_format=None, max_rows=None):
998977
"""
999978
1000979
Internal function, should always return unicode string
1001980
"""
1002-
1003-
formatter = fmt.SeriesFormatter(self, name=name, header=print_header,
1004-
length=length, dtype=dtype,
981+
formatter = fmt.SeriesFormatter(self, name=name,
982+
length=length, header=header,
983+
dtype=dtype,
1005984
na_rep=na_rep,
1006-
float_format=float_format)
985+
float_format=float_format,
986+
max_rows=max_rows)
1007987
result = formatter.to_string()
1008988

1009989
# TODO: following check prob. not neces.

pandas/tests/test_categorical.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
from pandas import Categorical, Index, Series, DataFrame, PeriodIndex, Timestamp
1515

16+
from pandas.core.config import option_context
1617
import pandas.core.common as com
1718
import pandas.compat as compat
1819
import pandas.util.testing as tm
@@ -1559,12 +1560,12 @@ def test_repr(self):
15591560

15601561
self.assertEqual(exp, a.__unicode__())
15611562

1562-
a = pd.Series(pd.Categorical(["a","b"] *25, name="a", ordered=True))
1563-
exp = u("".join(["%s a\n%s b\n"%(i,i+1) for i in range(0,10,2)]) + "...\n" +
1564-
"".join(["%s a\n%s b\n"%(i,i+1) for i in range(40,50,2)]) +
1565-
"Name: a, Length: 50, dtype: category\n" +
1566-
"Categories (2, object): [a < b]")
1567-
self.assertEqual(exp,a._tidy_repr())
1563+
a = pd.Series(pd.Categorical(["a","b"] *25, name="a"))
1564+
exp = u("0 a\n1 b\n" + " ..\n" +
1565+
"48 a\n49 b\n" +
1566+
"Name: a, dtype: category\nCategories (2, object): [a, b]")
1567+
with option_context("display.max_rows", 5):
1568+
self.assertEqual(exp, repr(a))
15681569

15691570
levs = list("abcdefghijklmnopqrstuvwxyz")
15701571
a = pd.Series(pd.Categorical(["a","b"], name="a", categories=levs, ordered=True))
@@ -1573,7 +1574,6 @@ def test_repr(self):
15731574
"Categories (26, object): [a < b < c < d ... w < x < y < z]")
15741575
self.assertEqual(exp,a.__unicode__())
15751576

1576-
15771577
def test_info(self):
15781578

15791579
# make sure it works

0 commit comments

Comments
 (0)