Skip to content

Commit d1f0f01

Browse files
committed
formatting MultiIndex
1 parent afe9c6b commit d1f0f01

File tree

8 files changed

+131
-155
lines changed

8 files changed

+131
-155
lines changed

doc/source/whatsnew/v0.16.1.txt

+45
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,51 @@ API changes
249249

250250
- By default, ``read_csv`` and ``read_table`` will now try to infer the compression type based on the file extension. Set ``compression=None`` to restore the previous behavior (no decompression). (:issue:`9770`)
251251

252+
.. _whatsnew_0161.index_repr:
253+
254+
Index Representation
255+
~~~~~~~~~~~~~~~~~~~~
256+
257+
The string representation of ``Index`` and its sub-classes have now been unified. These are all uniform in their output
258+
formats, except for ``MultiIndex``, which has a multi-line repr. The display width responds to the option ``display.max_seq_len``,
259+
which is now defaulted to 10 (previously was 100). (:issue:`6482`)
260+
261+
Previous Behavior
262+
263+
.. code-block:: python
264+
265+
266+
In [1]: pd.get_option('max_seq_items')
267+
Out[1]: 100
268+
269+
In [2]: pd.Index(range(4),name='foo')
270+
Out[2]: Int64Index([0, 1, 2, 3], dtype='int64')
271+
272+
In [3]: pd.Index(range(104),name='foo')
273+
Out[3]: Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, ...], dtype='int64')
274+
275+
In [4]: pd.date_range('20130101',periods=4,name='foo',tz='US/Eastern')
276+
Out[4]:
277+
<class 'pandas.tseries.index.DatetimeIndex'>
278+
[2013-01-01 00:00:00-05:00, ..., 2013-01-04 00:00:00-05:00]
279+
Length: 4, Freq: D, Timezone: US/Eastern
280+
281+
In [5]: pd.date_range('20130101',periods=104,name='foo',tz='US/Eastern')
282+
Out[5]:
283+
<class 'pandas.tseries.index.DatetimeIndex'>
284+
[2013-01-01 00:00:00-05:00, ..., 2013-04-14 00:00:00-04:00]
285+
Length: 104, Freq: D, Timezone: US/Eastern
286+
287+
New Behavior
288+
289+
.. ipython:: python
290+
291+
pd.get_option('max_seq_items')
292+
pd.Index(range(4),name='foo')
293+
pd.Index(range(104),name='foo')
294+
pd.date_range('20130101',periods=4,name='foo',tz='US/Eastern')
295+
pd.date_range('20130101',periods=104,name='foo',tz='US/Eastern')
296+
252297
.. _whatsnew_0161.performance:
253298

254299
Performance Improvements

pandas/core/config_init.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ def mpl_style_cb(key):
269269
cf.register_option('show_dimensions', 'truncate', pc_show_dimensions_doc,
270270
validator=is_one_of_factory([True, False, 'truncate']))
271271
cf.register_option('chop_threshold', None, pc_chop_threshold_doc)
272-
cf.register_option('max_seq_items', 100, pc_max_seq_items)
272+
cf.register_option('max_seq_items', 10, pc_max_seq_items)
273273
cf.register_option('mpl_style', None, pc_mpl_style_doc,
274274
validator=is_one_of_factory([None, False, 'default']),
275275
cb=mpl_style_cb)

pandas/core/index.py

+35-37
Original file line numberDiff line numberDiff line change
@@ -395,21 +395,33 @@ def __unicode__(self):
395395
klass = self.__class__.__name__
396396
data = self._format_data()
397397
attrs = self._format_attrs()
398-
max_seq_items = get_option('display.max_seq_items')
399-
if len(self) > max_seq_items:
400-
space = "\n%s" % (' ' * (len(klass) + 1))
401-
else:
402-
space = " "
398+
space = self._format_space()
403399

404400
prepr = (u(",%s") % space).join([u("%s=%s") % (k, v)
405401
for k, v in attrs])
406-
res = u("%s(%s,%s%s)") % (klass,
407-
data,
408-
space,
409-
prepr)
402+
403+
# no data provided, just attributes
404+
if data is None:
405+
data = ''
406+
else:
407+
data = "%s,%s" % (data, space)
408+
409+
res = u("%s(%s%s)") % (klass,
410+
data,
411+
prepr)
410412

411413
return res
412414

415+
def _format_space(self):
416+
417+
# using space here controls if the attributes
418+
# are line separated or not (the default)
419+
420+
#max_seq_items = get_option('display.max_seq_items')
421+
#if len(self) > max_seq_items:
422+
# space = "\n%s" % (' ' * (len(klass) + 1))
423+
return " "
424+
413425
@property
414426
def _formatter_func(self):
415427
"""
@@ -421,7 +433,6 @@ def _format_data(self):
421433
"""
422434
Return the formatted data as a unicode string
423435
"""
424-
425436
max_seq_items = get_option('display.max_seq_items')
426437
formatter = self._formatter_func
427438
n = len(self)
@@ -450,9 +461,12 @@ def _format_attrs(self):
450461
Return a list of tuples of the (attr,formatted_value)
451462
"""
452463
attrs = []
464+
attrs.append(('dtype',"'%s'" % self.dtype))
453465
if self.name is not None:
454466
attrs.append(('name',default_pprint(self.name)))
455-
attrs.append(('dtype',"'%s'" % self.dtype))
467+
max_seq_items = get_option('display.max_seq_items')
468+
if len(self) > max_seq_items:
469+
attrs.append(('length',len(self)))
456470
return attrs
457471

458472
def to_series(self, **kwargs):
@@ -3937,40 +3951,24 @@ def nbytes(self):
39373951
names_nbytes = sum(( getsizeof(i) for i in self.names ))
39383952
return level_nbytes + label_nbytes + names_nbytes
39393953

3940-
def __repr__(self):
3941-
encoding = get_option('display.encoding')
3954+
def _format_attrs(self):
3955+
"""
3956+
Return a list of tuples of the (attr,formatted_value)
3957+
"""
39423958
attrs = [('levels', default_pprint(self.levels)),
39433959
('labels', default_pprint(self.labels))]
39443960
if not all(name is None for name in self.names):
39453961
attrs.append(('names', default_pprint(self.names)))
39463962
if self.sortorder is not None:
39473963
attrs.append(('sortorder', default_pprint(self.sortorder)))
3964+
return attrs
39483965

3949-
space = ' ' * (len(self.__class__.__name__) + 1)
3950-
prepr = (u(",\n%s") % space).join([u("%s=%s") % (k, v)
3951-
for k, v in attrs])
3952-
res = u("%s(%s)") % (self.__class__.__name__, prepr)
3953-
3954-
if not compat.PY3:
3955-
# needs to be str in Python 2
3956-
res = res.encode(encoding)
3957-
return res
3958-
3959-
def __unicode__(self):
3960-
"""
3961-
Return a string representation for a particular Index
3966+
def _format_space(self):
3967+
return "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
39623968

3963-
Invoked by unicode(df) in py2 only. Yields a Unicode String in both
3964-
py2/py3.
3965-
"""
3966-
rows = self.format(names=True)
3967-
max_rows = get_option('display.max_rows')
3968-
if len(rows) > max_rows:
3969-
spaces = (len(rows[0]) - 3) // 2
3970-
centered = ' ' * spaces
3971-
half = max_rows // 2
3972-
rows = rows[:half] + [centered + '...' + centered] + rows[-half:]
3973-
return "\n".join(rows)
3969+
def _format_data(self):
3970+
# we are formatting thru the attributes
3971+
return None
39743972

39753973
def __len__(self):
39763974
return len(self.labels[0])

pandas/tests/test_format.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -3215,13 +3215,13 @@ def test_date_explict_date_format(self):
32153215
class TestDatetimeIndexUnicode(tm.TestCase):
32163216
def test_dates(self):
32173217
text = str(pd.to_datetime([datetime(2013,1,1), datetime(2014,1,1)]))
3218-
self.assertTrue("[2013-01-01," in text)
3219-
self.assertTrue(", 2014-01-01]" in text)
3218+
self.assertTrue("['2013-01-01'," in text)
3219+
self.assertTrue(", '2014-01-01']" in text)
32203220

32213221
def test_mixed(self):
32223222
text = str(pd.to_datetime([datetime(2013,1,1), datetime(2014,1,1,12), datetime(2014,1,1)]))
3223-
self.assertTrue("[2013-01-01 00:00:00," in text)
3224-
self.assertTrue(", 2014-01-01 00:00:00]" in text)
3223+
self.assertTrue("['2013-01-01 00:00:00'," in text)
3224+
self.assertTrue(", '2014-01-01 00:00:00']" in text)
32253225

32263226

32273227
class TestStringRepTimestamp(tm.TestCase):

pandas/tests/test_index.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -2487,16 +2487,13 @@ def test_slice_keep_name(self):
24872487

24882488
class DatetimeLike(Base):
24892489

2490-
def test_repr_roundtrip(self):
2491-
raise nose.SkipTest("Short reprs are not supported repr for Datetimelike indexes")
2492-
24932490
def test_str(self):
24942491

24952492
# test the string repr
24962493
idx = self.create_index()
24972494
idx.name = 'foo'
2498-
self.assertTrue("length=%s" % len(idx) in str(idx))
2499-
self.assertTrue("u'foo'" in str(idx))
2495+
self.assertFalse("length=%s" % len(idx) in str(idx))
2496+
self.assertTrue("'foo'" in str(idx))
25002497
self.assertTrue(idx.__class__.__name__ in str(idx))
25012498

25022499
if hasattr(idx,'tz'):

pandas/tseries/base.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -260,17 +260,13 @@ def argmax(self, axis=None):
260260

261261
@property
262262
def _formatter_func(self):
263-
"""
264-
Format function to convert value to representation
265-
"""
266-
return str
263+
raise AbstractMethodError(self)
267264

268265
def _format_attrs(self):
269266
"""
270267
Return a list of tuples of the (attr,formatted_value)
271268
"""
272269
attrs = super(DatetimeIndexOpsMixin, self)._format_attrs()
273-
attrs.append(('length',len(self)))
274270
for attrib in self._attributes:
275271
if attrib == 'freq':
276272
freq = self.freqstr
@@ -504,4 +500,6 @@ def summary(self, name=None):
504500
if self.freq:
505501
result += '\nFreq: %s' % self.freqstr
506502

503+
# display as values, not quoted
504+
result = result.replace("'","")
507505
return result

pandas/tseries/period.py

+4
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,10 @@ def _to_embed(self, keep_tz=False):
293293
""" return an array repr of this object, potentially casting to object """
294294
return self.asobject.values
295295

296+
@property
297+
def _formatter_func(self):
298+
return lambda x: "'%s'" % x
299+
296300
def asof_locs(self, where, mask):
297301
"""
298302
where : array of timestamps

0 commit comments

Comments
 (0)