diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index 0eeee8ccfddf6..e61b7d18672f7 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -567,3 +567,39 @@ Bug Fixes - Bug in ``Series.values_counts`` with excluding ``NaN`` for categorical type ``Series`` with ``dropna=True`` (:issue:`9443`) - Fixed mising numeric_only option for ``DataFrame.std/var/sem`` (:issue:`9201`) - Support constructing ``Panel`` or ``Panel4D`` with scalar data (:issue:`8285`) +- ``Series`` text representation disconnected from `max_rows`/`max_columns` (:issue:`7508`). +- ``Series`` number formatting inconsistent when truncated (:issue:`8532`). + + Previous Behavior + + .. code-block:: python + + In [2]: pd.options.display.max_rows = 10 + In [3]: s = pd.Series([1,1,1,1,1,1,1,1,1,1,0.9999,1,1]*10) + In [4]: s + Out[4]: + 0 1 + 1 1 + 2 1 + ... + 127 0.9999 + 128 1.0000 + 129 1.0000 + Length: 130, dtype: float64 + + New Behavior + + .. code-block:: python + + 0 1.0000 + 1 1.0000 + 2 1.0000 + 3 1.0000 + 4 1.0000 + ... + 125 1.0000 + 126 1.0000 + 127 0.9999 + 128 1.0000 + 129 1.0000 + dtype: float64 diff --git a/pandas/core/format.py b/pandas/core/format.py index 3efcfec254591..b21ca9050ffd0 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -129,45 +129,63 @@ def to_string(self): class SeriesFormatter(object): - def __init__(self, series, buf=None, header=True, length=True, - na_rep='NaN', name=False, float_format=None, dtype=True): + def __init__(self, series, buf=None, length=True, header=True, + na_rep='NaN', name=False, float_format=None, dtype=True, + max_rows=None): self.series = series self.buf = buf if buf is not None else StringIO() self.name = name self.na_rep = na_rep - self.length = length self.header = header + self.length = length + self.max_rows = max_rows if float_format is None: float_format = get_option("display.float_format") self.float_format = float_format self.dtype = dtype + self._chk_truncate() + + def _chk_truncate(self): + from pandas.tools.merge import concat + max_rows = self.max_rows + truncate_v = max_rows and (len(self.series) > max_rows) + series = self.series + if truncate_v: + if max_rows == 1: + row_num = max_rows + series = series.iloc[:max_rows] + else: + row_num = max_rows // 2 + series = concat((series.iloc[:row_num], series.iloc[-row_num:])) + self.tr_row_num = row_num + self.tr_series = series + self.truncate_v = truncate_v + def _get_footer(self): + name = self.series.name footer = u('') - if self.name: - if getattr(self.series.index, 'freq', None): - footer += 'Freq: %s' % self.series.index.freqstr + if getattr(self.series.index, 'freq', None) is not None: + footer += 'Freq: %s' % self.series.index.freqstr - if footer and self.series.name is not None: - # categories have already a comma + linebreak - if not com.is_categorical_dtype(self.series.dtype): - footer += ', ' + if self.name is not False and name is not None: + if footer: + footer += ', ' - series_name = com.pprint_thing(self.series.name, + series_name = com.pprint_thing(name, escape_chars=('\t', '\r', '\n')) footer += ("Name: %s" % - series_name) if self.series.name is not None else "" + series_name) if name is not None else "" if self.length: if footer: footer += ', ' footer += 'Length: %d' % len(self.series) - # TODO: in tidy_repr, with freq index, no dtype is shown -> also include a guard here? - if self.dtype: - name = getattr(self.series.dtype, 'name', None) + if self.dtype is not False and self.dtype is not None: + name = getattr(self.tr_series.dtype, 'name', None) if name: if footer: footer += ', ' @@ -175,8 +193,8 @@ def _get_footer(self): # level infos are added to the end and in a new line, like it is done for Categoricals # Only added when we request a name - if self.name and com.is_categorical_dtype(self.series.dtype): - level_info = self.series.values._repr_categories_info() + if name and com.is_categorical_dtype(self.tr_series.dtype): + level_info = self.tr_series.values._repr_categories_info() if footer: footer += "\n" footer += level_info @@ -184,7 +202,7 @@ def _get_footer(self): return compat.text_type(footer) def _get_formatted_index(self): - index = self.series.index + index = self.tr_series.index is_multi = isinstance(index, MultiIndex) if is_multi: @@ -196,35 +214,44 @@ def _get_formatted_index(self): return fmt_index, have_header def _get_formatted_values(self): - return format_array(self.series.get_values(), None, + return format_array(self.tr_series.get_values(), None, float_format=self.float_format, na_rep=self.na_rep) def to_string(self): - series = self.series + series = self.tr_series + footer = self._get_footer() if len(series) == 0: - return u('') + return 'Series([], ' + footer + ')' fmt_index, have_header = self._get_formatted_index() fmt_values = self._get_formatted_values() - maxlen = max(len(x) for x in fmt_index) + maxlen = max(len(x) for x in fmt_index) # max index len pad_space = min(maxlen, 60) - result = ['%s %s'] * len(fmt_values) - for i, (k, v) in enumerate(zip(fmt_index[1:], fmt_values)): - idx = k.ljust(pad_space) - result[i] = result[i] % (idx, v) + if self.truncate_v: + n_header_rows = 0 + row_num = self.tr_row_num + width = len(fmt_values[row_num-1]) + if width > 3: + dot_str = '...' + else: + dot_str = '..' + dot_str = dot_str.center(width) + fmt_values.insert(row_num + n_header_rows, dot_str) + fmt_index.insert(row_num + 1, '') + + result = adjoin(3, *[fmt_index[1:], fmt_values]) if self.header and have_header: - result.insert(0, fmt_index[0]) + result = fmt_index[0] + '\n' + result - footer = self._get_footer() if footer: - result.append(footer) + result += '\n' + footer - return compat.text_type(u('\n').join(result)) + return compat.text_type(u('').join(result)) def _strlen_func(): diff --git a/pandas/core/series.py b/pandas/core/series.py index d34657f0dc256..7e3b21be13525 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -36,7 +36,7 @@ from pandas.tseries.period import PeriodIndex, Period from pandas import compat from pandas.util.terminal import get_terminal_size -from pandas.compat import zip, u, OrderedDict +from pandas.compat import zip, u, OrderedDict, StringIO import pandas.core.ops as ops from pandas.core.algorithms import select_n @@ -883,43 +883,16 @@ def __unicode__(self): Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3. """ + buf = StringIO(u("")) width, height = get_terminal_size() max_rows = (height if get_option("display.max_rows") == 0 else get_option("display.max_rows")) - if max_rows and len(self.index) > max_rows: - result = self._tidy_repr(min(30, max_rows - 4)) - elif len(self.index) > 0: - result = self._get_repr(print_header=True, - length=len(self) > 50, - name=True, - dtype=True) - elif self.name is None: - result = u('Series([], dtype: %s)') % (self.dtype) - else: - result = u('Series([], name: %s, dtype: %s)') % (self.name, - self.dtype) - return result - def _tidy_repr(self, max_vals=20): - """ + self.to_string(buf=buf, name=self.name, dtype=self.dtype, + max_rows=max_rows) + result = buf.getvalue() - Internal function, should always return unicode string - """ - if max_vals > 1: - num = max_vals // 2 - else: - num = 1 - max_vals = 2 - head = self.iloc[:num]._get_repr(print_header=True, length=False, - dtype=False, name=False) - tail = self.iloc[-(max_vals - num):]._get_repr(print_header=False, - length=False, - name=False, - dtype=False) - result = head + '\n...\n' + tail - result = '%s\n%s' % (result, self._repr_footer()) - - return compat.text_type(result) + return result def _repr_footer(self): @@ -948,8 +921,8 @@ def _repr_footer(self): len(self), str(self.dtype.name)) - def to_string(self, buf=None, na_rep='NaN', float_format=None, - length=False, dtype=False, name=False): + def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True, + length=False, dtype=False, name=False, max_rows=None): """ Render a string representation of the Series @@ -962,12 +935,17 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, float_format : one-parameter function, optional formatter function to apply to columns' elements if they are floats default None + header: boolean, default True + Add the Series header (index name) length : boolean, default False Add the Series length dtype : boolean, default False Add the Series dtype name : boolean, default False - Add the Series name (which may be None) + Add the Series name if not None + max_rows : int, optional + Maximum number of rows to show before truncating. If None, show + all. Returns ------- @@ -975,7 +953,8 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, """ the_repr = self._get_repr(float_format=float_format, na_rep=na_rep, - length=length, dtype=dtype, name=name) + header=header, length=length, dtype=dtype, + name=name, max_rows=max_rows) # catch contract violations if not isinstance(the_repr, compat.text_type): @@ -993,17 +972,18 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, f.write(the_repr) def _get_repr( - self, name=False, print_header=False, length=True, dtype=True, - na_rep='NaN', float_format=None): + self, name=False, header=True, length=True, dtype=True, na_rep='NaN', + float_format=None, max_rows=None): """ Internal function, should always return unicode string """ - - formatter = fmt.SeriesFormatter(self, name=name, header=print_header, - length=length, dtype=dtype, + formatter = fmt.SeriesFormatter(self, name=name, + length=length, header=header, + dtype=dtype, na_rep=na_rep, - float_format=float_format) + float_format=float_format, + max_rows=max_rows) result = formatter.to_string() # TODO: following check prob. not neces. diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index cd78fd22e64ca..7f4b3fcb94dfa 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -13,6 +13,7 @@ from pandas import Categorical, Index, Series, DataFrame, PeriodIndex, Timestamp +from pandas.core.config import option_context import pandas.core.common as com import pandas.compat as compat import pandas.util.testing as tm @@ -1559,12 +1560,12 @@ def test_repr(self): self.assertEqual(exp, a.__unicode__()) - a = pd.Series(pd.Categorical(["a","b"] *25, name="a", ordered=True)) - exp = u("".join(["%s a\n%s b\n"%(i,i+1) for i in range(0,10,2)]) + "...\n" + - "".join(["%s a\n%s b\n"%(i,i+1) for i in range(40,50,2)]) + - "Name: a, Length: 50, dtype: category\n" + - "Categories (2, object): [a < b]") - self.assertEqual(exp,a._tidy_repr()) + a = pd.Series(pd.Categorical(["a","b"] *25, name="a")) + exp = u("0 a\n1 b\n" + " ..\n" + + "48 a\n49 b\n" + + "Name: a, dtype: category\nCategories (2, object): [a, b]") + with option_context("display.max_rows", 5): + self.assertEqual(exp, repr(a)) levs = list("abcdefghijklmnopqrstuvwxyz") a = pd.Series(pd.Categorical(["a","b"], name="a", categories=levs, ordered=True)) @@ -1573,7 +1574,6 @@ def test_repr(self): "Categories (26, object): [a < b < c < d ... w < x < y < z]") self.assertEqual(exp,a.__unicode__()) - def test_info(self): # make sure it works diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index b52e4f7e3947b..94a7dd4dd9e87 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -2438,16 +2438,16 @@ def test_to_string(self): # pass float_format format = '%.4f'.__mod__ result = self.ts.to_string(float_format=format) - result = [x.split()[1] for x in result.split('\n')] + result = [x.split()[1] for x in result.split('\n')[:-1]] expected = [format(x) for x in self.ts] self.assertEqual(result, expected) # empty string result = self.ts[:0].to_string() - self.assertEqual(result, '') + self.assertEqual(result, 'Series([], Freq: B)') result = self.ts[:0].to_string(length=0) - self.assertEqual(result, '') + self.assertEqual(result, 'Series([], Freq: B)') # name and length cp = self.ts.copy() @@ -2623,7 +2623,7 @@ def test_max_multi_index_display(self): with option_context("display.max_rows", 2): self.assertEqual(len(str(s).split('\n')),5) with option_context("display.max_rows", 1): - self.assertEqual(len(str(s).split('\n')),5) + self.assertEqual(len(str(s).split('\n')),4) with option_context("display.max_rows", 0): self.assertEqual(len(str(s).split('\n')),10) @@ -2637,10 +2637,137 @@ def test_max_multi_index_display(self): with option_context("display.max_rows", 2): self.assertEqual(len(str(s).split('\n')),4) with option_context("display.max_rows", 1): - self.assertEqual(len(str(s).split('\n')),4) + self.assertEqual(len(str(s).split('\n')),3) with option_context("display.max_rows", 0): self.assertEqual(len(str(s).split('\n')),9) + # Make sure #8532 is fixed + def test_consistent_format(self): + s = pd.Series([1,1,1,1,1,1,1,1,1,1,0.9999,1,1]*10) + with option_context("display.max_rows", 10): + res = repr(s) + exp = ('0 1.0000\n1 1.0000\n2 1.0000\n3 ' + '1.0000\n4 1.0000\n ... \n125 ' + '1.0000\n126 1.0000\n127 0.9999\n128 ' + '1.0000\n129 1.0000\ndtype: float64') + self.assertEqual(res, exp) + + @staticmethod + def gen_test_series(): + s1 = pd.Series(['a']*100) + s2 = pd.Series(['ab']*100) + s3 = pd.Series(['a', 'ab', 'abc', 'abcd', 'abcde', 'abcdef']) + s4 = s3[::-1] + test_sers = {'onel': s1, 'twol': s2, 'asc': s3, 'desc': s4} + return test_sers + + def chck_ncols(self, s): + with option_context("display.max_rows", 10): + res = repr(s) + lines = res.split('\n') + lines = [line for line in repr(s).split('\n') \ + if not re.match('[^\.]*\.+', line)][:-1] + ncolsizes = len(set(len(line.strip()) for line in lines)) + self.assertEqual(ncolsizes, 1) + + def test_format_explicit(self): + test_sers = self.gen_test_series() + with option_context("display.max_rows", 4): + res = repr(test_sers['onel']) + exp = '0 a\n1 a\n ..\n98 a\n99 a\ndtype: object' + self.assertEqual(exp, res) + res = repr(test_sers['twol']) + exp = ('0 ab\n1 ab\n ..\n98 ab\n99 ab\ndtype:' + ' object') + self.assertEqual(exp, res) + res = repr(test_sers['asc']) + exp = ('0 a\n1 ab\n ... \n4 abcde\n5' + ' abcdef\ndtype: object') + self.assertEqual(exp, res) + res = repr(test_sers['desc']) + exp = ('5 abcdef\n4 abcde\n ... \n1 ab\n0' + ' a\ndtype: object') + self.assertEqual(exp, res) + + def test_ncols(self): + test_sers = self.gen_test_series() + for s in test_sers.values(): + self.chck_ncols(s) + + def test_max_rows_eq_one(self): + s = Series(range(10)) + with option_context("display.max_rows", 1): + strrepr = repr(s).split('\n') + exp1 = ['0', '0'] + res1 = strrepr[0].split() + self.assertEqual(exp1, res1) + exp2 = ['..'] + res2 = strrepr[1].split() + self.assertEqual(exp2, res2) + + def test_truncate_ndots(self): + def getndots(s): + return len(re.match('[^\.]*(\.*)', s).groups()[0]) + + s = Series([0, 2, 3, 6]) + with option_context("display.max_rows", 2): + strrepr = repr(s).replace('\n', '') + self.assertEqual(getndots(strrepr), 2) + + s = Series([0, 100, 200, 400]) + with option_context("display.max_rows", 2): + strrepr = repr(s).replace('\n', '') + self.assertEqual(getndots(strrepr), 3) + + def test_to_string_name(self): + s = Series(range(100)) + s.name = 'myser' + res = s.to_string(max_rows=2, name=True) + exp = '0 0\n ..\n99 99\nName: myser' + self.assertEqual(res, exp) + res = s.to_string(max_rows=2, name=False) + exp = '0 0\n ..\n99 99' + self.assertEqual(res, exp) + + def test_to_string_dtype(self): + s = Series(range(100)) + res = s.to_string(max_rows=2, dtype=True) + exp = '0 0\n ..\n99 99\ndtype: int64' + self.assertEqual(res, exp) + res = s.to_string(max_rows=2, dtype=False) + exp = '0 0\n ..\n99 99' + self.assertEqual(res, exp) + + def test_to_string_length(self): + s = Series(range(100)) + res = s.to_string(max_rows=2, length=True) + exp = '0 0\n ..\n99 99\nLength: 100' + self.assertEqual(res, exp) + + def test_to_string_na_rep(self): + s = pd.Series(index=range(100)) + res = s.to_string(na_rep='foo', max_rows=2) + exp = '0 foo\n ..\n99 foo' + self.assertEqual(res, exp) + + def test_to_string_float_format(self): + s = pd.Series(range(10), dtype=float) + res = s.to_string(float_format=lambda x: '{0:2.1f}'.format(x), + max_rows=2) + exp = '0 0.0\n ..\n9 9.0' + self.assertEqual(res, exp) + + def test_to_string_header(self): + s = pd.Series(range(10)) + s.index.name = 'foo' + res = s.to_string(header=True, max_rows=2) + exp = 'foo\n0 0\n ..\n9 9' + self.assertEqual(res, exp) + res = s.to_string(header=False, max_rows=2) + exp = '0 0\n ..\n9 9' + self.assertEqual(res, exp) + + class TestEngFormatter(tm.TestCase): _multiprocess_can_split_ = True diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 7e0dbaa735456..ae2ed4eaca2f4 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -2046,7 +2046,7 @@ def test_repr(self): # with empty series (#4651) s = Series([], dtype=np.int64, name='foo') - self.assertEqual(repr(s), 'Series([], name: foo, dtype: int64)') + self.assertEqual(repr(s), 'Series([], Name: foo, dtype: int64)') s = Series([], dtype=np.int64, name=None) self.assertEqual(repr(s), 'Series([], dtype: int64)')