Skip to content

Commit 82f097d

Browse files
committed
formatting MultiIndex
1 parent cec73d0 commit 82f097d

File tree

8 files changed

+133
-157
lines changed

8 files changed

+133
-157
lines changed

doc/source/whatsnew/v0.16.1.txt

+45
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,51 @@ API changes
7979
the order was arbitrary. (:issue:`9777`)
8080

8181

82+
.. _whatsnew_0161.index_repr:
83+
84+
Index Representation
85+
~~~~~~~~~~~~~~~~~~~~
86+
87+
The string representation of ``Index`` and its sub-classes have now been unified. These are all uniform in their output
88+
formats, except for ``MultiIndex``, which has a multi-line repr. The display width responds to the option ``display.max_seq_len``,
89+
which is now defaulted to 10 (previously was 100). (:issue:`6482`)
90+
91+
Previous Behavior
92+
93+
.. code-block:: python
94+
95+
96+
In [1]: pd.get_option('max_seq_items')
97+
Out[1]: 100
98+
99+
In [2]: pd.Index(range(4),name='foo')
100+
Out[2]: Int64Index([0, 1, 2, 3], dtype='int64')
101+
102+
In [3]: pd.Index(range(104),name='foo')
103+
Out[3]: Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, ...], dtype='int64')
104+
105+
In [4]: pd.date_range('20130101',periods=4,name='foo',tz='US/Eastern')
106+
Out[4]:
107+
<class 'pandas.tseries.index.DatetimeIndex'>
108+
[2013-01-01 00:00:00-05:00, ..., 2013-01-04 00:00:00-05:00]
109+
Length: 4, Freq: D, Timezone: US/Eastern
110+
111+
In [5]: pd.date_range('20130101',periods=104,name='foo',tz='US/Eastern')
112+
Out[5]:
113+
<class 'pandas.tseries.index.DatetimeIndex'>
114+
[2013-01-01 00:00:00-05:00, ..., 2013-04-14 00:00:00-04:00]
115+
Length: 104, Freq: D, Timezone: US/Eastern
116+
117+
New Behavior
118+
119+
.. ipython:: python
120+
121+
pd.get_option('max_seq_items')
122+
pd.Index(range(4),name='foo')
123+
pd.Index(range(104),name='foo')
124+
pd.date_range('20130101',periods=4,name='foo',tz='US/Eastern')
125+
pd.date_range('20130101',periods=104,name='foo',tz='US/Eastern')
126+
82127
.. _whatsnew_0161.performance:
83128

84129
Performance Improvements

pandas/core/config_init.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ def mpl_style_cb(key):
269269
cf.register_option('show_dimensions', 'truncate', pc_show_dimensions_doc,
270270
validator=is_one_of_factory([True, False, 'truncate']))
271271
cf.register_option('chop_threshold', None, pc_chop_threshold_doc)
272-
cf.register_option('max_seq_items', 100, pc_max_seq_items)
272+
cf.register_option('max_seq_items', 10, pc_max_seq_items)
273273
cf.register_option('mpl_style', None, pc_mpl_style_doc,
274274
validator=is_one_of_factory([None, False, 'default']),
275275
cb=mpl_style_cb)

pandas/core/index.py

+35-37
Original file line numberDiff line numberDiff line change
@@ -413,21 +413,33 @@ def __unicode__(self):
413413
klass = self.__class__.__name__
414414
data = self._format_data()
415415
attrs = self._format_attrs()
416-
max_seq_items = get_option('display.max_seq_items')
417-
if len(self) > max_seq_items:
418-
space = "\n%s" % (' ' * (len(klass) + 1))
419-
else:
420-
space = " "
416+
space = self._format_space()
421417

422418
prepr = (u(",%s") % space).join([u("%s=%s") % (k, v)
423419
for k, v in attrs])
424-
res = u("%s(%s,%s%s)") % (klass,
425-
data,
426-
space,
427-
prepr)
420+
421+
# no data provided, just attributes
422+
if data is None:
423+
data = ''
424+
else:
425+
data = "%s,%s" % (data, space)
426+
427+
res = u("%s(%s%s)") % (klass,
428+
data,
429+
prepr)
428430

429431
return res
430432

433+
def _format_space(self):
434+
435+
# using space here controls if the attributes
436+
# are line separated or not (the default)
437+
438+
#max_seq_items = get_option('display.max_seq_items')
439+
#if len(self) > max_seq_items:
440+
# space = "\n%s" % (' ' * (len(klass) + 1))
441+
return " "
442+
431443
@property
432444
def _formatter_func(self):
433445
"""
@@ -439,7 +451,6 @@ def _format_data(self):
439451
"""
440452
Return the formatted data as a unicode string
441453
"""
442-
443454
max_seq_items = get_option('display.max_seq_items')
444455
formatter = self._formatter_func
445456
n = len(self)
@@ -468,9 +479,12 @@ def _format_attrs(self):
468479
Return a list of tuples of the (attr,formatted_value)
469480
"""
470481
attrs = []
482+
attrs.append(('dtype',"'%s'" % self.dtype))
471483
if self.name is not None:
472484
attrs.append(('name',default_pprint(self.name)))
473-
attrs.append(('dtype',"'%s'" % self.dtype))
485+
max_seq_items = get_option('display.max_seq_items')
486+
if len(self) > max_seq_items:
487+
attrs.append(('length',len(self)))
474488
return attrs
475489

476490
def to_series(self, **kwargs):
@@ -3280,40 +3294,24 @@ def nbytes(self):
32803294
names_nbytes = sum(( getsizeof(i) for i in self.names ))
32813295
return level_nbytes + label_nbytes + names_nbytes
32823296

3283-
def __repr__(self):
3284-
encoding = get_option('display.encoding')
3297+
def _format_attrs(self):
3298+
"""
3299+
Return a list of tuples of the (attr,formatted_value)
3300+
"""
32853301
attrs = [('levels', default_pprint(self.levels)),
32863302
('labels', default_pprint(self.labels))]
32873303
if not all(name is None for name in self.names):
32883304
attrs.append(('names', default_pprint(self.names)))
32893305
if self.sortorder is not None:
32903306
attrs.append(('sortorder', default_pprint(self.sortorder)))
3307+
return attrs
32913308

3292-
space = ' ' * (len(self.__class__.__name__) + 1)
3293-
prepr = (u(",\n%s") % space).join([u("%s=%s") % (k, v)
3294-
for k, v in attrs])
3295-
res = u("%s(%s)") % (self.__class__.__name__, prepr)
3296-
3297-
if not compat.PY3:
3298-
# needs to be str in Python 2
3299-
res = res.encode(encoding)
3300-
return res
3309+
def _format_space(self):
3310+
return "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
33013311

3302-
def __unicode__(self):
3303-
"""
3304-
Return a string representation for a particular Index
3305-
3306-
Invoked by unicode(df) in py2 only. Yields a Unicode String in both
3307-
py2/py3.
3308-
"""
3309-
rows = self.format(names=True)
3310-
max_rows = get_option('display.max_rows')
3311-
if len(rows) > max_rows:
3312-
spaces = (len(rows[0]) - 3) // 2
3313-
centered = ' ' * spaces
3314-
half = max_rows // 2
3315-
rows = rows[:half] + [centered + '...' + centered] + rows[-half:]
3316-
return "\n".join(rows)
3312+
def _format_data(self):
3313+
# we are formatting thru the attributes
3314+
return None
33173315

33183316
def __len__(self):
33193317
return len(self.labels[0])

pandas/tests/test_format.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -3010,12 +3010,12 @@ def test_format(self):
30103010

30113011
def test_output_significant_digits(self):
30123012
# Issue #9764
3013-
3013+
30143014
# In case default display precision changes:
30153015
with pd.option_context('display.precision', 7):
30163016
# DataFrame example from issue #9764
30173017
d=pd.DataFrame({'col1':[9.999e-8, 1e-7, 1.0001e-7, 2e-7, 4.999e-7, 5e-7, 5.0001e-7, 6e-7, 9.999e-7, 1e-6, 1.0001e-6, 2e-6, 4.999e-6, 5e-6, 5.0001e-6, 6e-6]})
3018-
3018+
30193019
expected_output={
30203020
(0,6):' col1\n0 9.999000e-08\n1 1.000000e-07\n2 1.000100e-07\n3 2.000000e-07\n4 4.999000e-07\n5 5.000000e-07',
30213021
(1,6):' col1\n1 1.000000e-07\n2 1.000100e-07\n3 2.000000e-07\n4 4.999000e-07\n5 5.000000e-07',
@@ -3200,13 +3200,13 @@ def test_date_explict_date_format(self):
32003200
class TestDatetimeIndexUnicode(tm.TestCase):
32013201
def test_dates(self):
32023202
text = str(pd.to_datetime([datetime(2013,1,1), datetime(2014,1,1)]))
3203-
self.assertTrue("[2013-01-01," in text)
3204-
self.assertTrue(", 2014-01-01]" in text)
3203+
self.assertTrue("['2013-01-01'," in text)
3204+
self.assertTrue(", '2014-01-01']" in text)
32053205

32063206
def test_mixed(self):
32073207
text = str(pd.to_datetime([datetime(2013,1,1), datetime(2014,1,1,12), datetime(2014,1,1)]))
3208-
self.assertTrue("[2013-01-01 00:00:00," in text)
3209-
self.assertTrue(", 2014-01-01 00:00:00]" in text)
3208+
self.assertTrue("['2013-01-01 00:00:00'," in text)
3209+
self.assertTrue(", '2014-01-01 00:00:00']" in text)
32103210

32113211

32123212
class TestStringRepTimestamp(tm.TestCase):

pandas/tests/test_index.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -2064,16 +2064,13 @@ def test_slice_keep_name(self):
20642064

20652065
class DatetimeLike(Base):
20662066

2067-
def test_repr_roundtrip(self):
2068-
raise nose.SkipTest("Short reprs are not supported repr for Datetimelike indexes")
2069-
20702067
def test_str(self):
20712068

20722069
# test the string repr
20732070
idx = self.create_index()
20742071
idx.name = 'foo'
2075-
self.assertTrue("length=%s" % len(idx) in str(idx))
2076-
self.assertTrue("u'foo'" in str(idx))
2072+
self.assertFalse("length=%s" % len(idx) in str(idx))
2073+
self.assertTrue("'foo'" in str(idx))
20772074
self.assertTrue(idx.__class__.__name__ in str(idx))
20782075

20792076
if hasattr(idx,'tz'):

pandas/tseries/base.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -255,17 +255,13 @@ def argmax(self, axis=None):
255255

256256
@property
257257
def _formatter_func(self):
258-
"""
259-
Format function to convert value to representation
260-
"""
261-
return str
258+
raise AbstractMethodError(self)
262259

263260
def _format_attrs(self):
264261
"""
265262
Return a list of tuples of the (attr,formatted_value)
266263
"""
267264
attrs = super(DatetimeIndexOpsMixin, self)._format_attrs()
268-
attrs.append(('length',len(self)))
269265
for attrib in self._attributes:
270266
if attrib == 'freq':
271267
freq = self.freqstr
@@ -499,4 +495,6 @@ def summary(self, name=None):
499495
if self.freq:
500496
result += '\nFreq: %s' % self.freqstr
501497

498+
# display as values, not quoted
499+
result = result.replace("'","")
502500
return result

pandas/tseries/period.py

+4
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,10 @@ def _to_embed(self, keep_tz=False):
293293
""" return an array repr of this object, potentially casting to object """
294294
return self.asobject.values
295295

296+
@property
297+
def _formatter_func(self):
298+
return lambda x: "'%s'" % x
299+
296300
def asof_locs(self, where, mask):
297301
"""
298302
where : array of timestamps

0 commit comments

Comments
 (0)