Skip to content

Commit d994bb7

Browse files
committed
formatting MultiIndex
1 parent f0e07f5 commit d994bb7

File tree

8 files changed

+131
-155
lines changed

8 files changed

+131
-155
lines changed

doc/source/whatsnew/v0.16.1.txt

+45
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,51 @@ API changes
207207

208208
- By default, ``read_csv`` and ``read_table`` will now try to infer the compression type based on the file extension. Set ``compression=None`` to restore the previous behavior (no decompression). (:issue:`9770`)
209209

210+
.. _whatsnew_0161.index_repr:
211+
212+
Index Representation
213+
~~~~~~~~~~~~~~~~~~~~
214+
215+
The string representation of ``Index`` and its sub-classes have now been unified. These are all uniform in their output
216+
formats, except for ``MultiIndex``, which has a multi-line repr. The display width responds to the option ``display.max_seq_len``,
217+
which is now defaulted to 10 (previously was 100). (:issue:`6482`)
218+
219+
Previous Behavior
220+
221+
.. code-block:: python
222+
223+
224+
In [1]: pd.get_option('max_seq_items')
225+
Out[1]: 100
226+
227+
In [2]: pd.Index(range(4),name='foo')
228+
Out[2]: Int64Index([0, 1, 2, 3], dtype='int64')
229+
230+
In [3]: pd.Index(range(104),name='foo')
231+
Out[3]: Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, ...], dtype='int64')
232+
233+
In [4]: pd.date_range('20130101',periods=4,name='foo',tz='US/Eastern')
234+
Out[4]:
235+
<class 'pandas.tseries.index.DatetimeIndex'>
236+
[2013-01-01 00:00:00-05:00, ..., 2013-01-04 00:00:00-05:00]
237+
Length: 4, Freq: D, Timezone: US/Eastern
238+
239+
In [5]: pd.date_range('20130101',periods=104,name='foo',tz='US/Eastern')
240+
Out[5]:
241+
<class 'pandas.tseries.index.DatetimeIndex'>
242+
[2013-01-01 00:00:00-05:00, ..., 2013-04-14 00:00:00-04:00]
243+
Length: 104, Freq: D, Timezone: US/Eastern
244+
245+
New Behavior
246+
247+
.. ipython:: python
248+
249+
pd.get_option('max_seq_items')
250+
pd.Index(range(4),name='foo')
251+
pd.Index(range(104),name='foo')
252+
pd.date_range('20130101',periods=4,name='foo',tz='US/Eastern')
253+
pd.date_range('20130101',periods=104,name='foo',tz='US/Eastern')
254+
210255
.. _whatsnew_0161.performance:
211256

212257
Performance Improvements

pandas/core/config_init.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ def mpl_style_cb(key):
269269
cf.register_option('show_dimensions', 'truncate', pc_show_dimensions_doc,
270270
validator=is_one_of_factory([True, False, 'truncate']))
271271
cf.register_option('chop_threshold', None, pc_chop_threshold_doc)
272-
cf.register_option('max_seq_items', 100, pc_max_seq_items)
272+
cf.register_option('max_seq_items', 10, pc_max_seq_items)
273273
cf.register_option('mpl_style', None, pc_mpl_style_doc,
274274
validator=is_one_of_factory([None, False, 'default']),
275275
cb=mpl_style_cb)

pandas/core/index.py

+35-37
Original file line numberDiff line numberDiff line change
@@ -395,21 +395,33 @@ def __unicode__(self):
395395
klass = self.__class__.__name__
396396
data = self._format_data()
397397
attrs = self._format_attrs()
398-
max_seq_items = get_option('display.max_seq_items')
399-
if len(self) > max_seq_items:
400-
space = "\n%s" % (' ' * (len(klass) + 1))
401-
else:
402-
space = " "
398+
space = self._format_space()
403399

404400
prepr = (u(",%s") % space).join([u("%s=%s") % (k, v)
405401
for k, v in attrs])
406-
res = u("%s(%s,%s%s)") % (klass,
407-
data,
408-
space,
409-
prepr)
402+
403+
# no data provided, just attributes
404+
if data is None:
405+
data = ''
406+
else:
407+
data = "%s,%s" % (data, space)
408+
409+
res = u("%s(%s%s)") % (klass,
410+
data,
411+
prepr)
410412

411413
return res
412414

415+
def _format_space(self):
416+
417+
# using space here controls if the attributes
418+
# are line separated or not (the default)
419+
420+
#max_seq_items = get_option('display.max_seq_items')
421+
#if len(self) > max_seq_items:
422+
# space = "\n%s" % (' ' * (len(klass) + 1))
423+
return " "
424+
413425
@property
414426
def _formatter_func(self):
415427
"""
@@ -421,7 +433,6 @@ def _format_data(self):
421433
"""
422434
Return the formatted data as a unicode string
423435
"""
424-
425436
max_seq_items = get_option('display.max_seq_items')
426437
formatter = self._formatter_func
427438
n = len(self)
@@ -450,9 +461,12 @@ def _format_attrs(self):
450461
Return a list of tuples of the (attr,formatted_value)
451462
"""
452463
attrs = []
464+
attrs.append(('dtype',"'%s'" % self.dtype))
453465
if self.name is not None:
454466
attrs.append(('name',default_pprint(self.name)))
455-
attrs.append(('dtype',"'%s'" % self.dtype))
467+
max_seq_items = get_option('display.max_seq_items')
468+
if len(self) > max_seq_items:
469+
attrs.append(('length',len(self)))
456470
return attrs
457471

458472
def to_series(self, **kwargs):
@@ -3931,40 +3945,24 @@ def nbytes(self):
39313945
names_nbytes = sum(( getsizeof(i) for i in self.names ))
39323946
return level_nbytes + label_nbytes + names_nbytes
39333947

3934-
def __repr__(self):
3935-
encoding = get_option('display.encoding')
3948+
def _format_attrs(self):
3949+
"""
3950+
Return a list of tuples of the (attr,formatted_value)
3951+
"""
39363952
attrs = [('levels', default_pprint(self.levels)),
39373953
('labels', default_pprint(self.labels))]
39383954
if not all(name is None for name in self.names):
39393955
attrs.append(('names', default_pprint(self.names)))
39403956
if self.sortorder is not None:
39413957
attrs.append(('sortorder', default_pprint(self.sortorder)))
3958+
return attrs
39423959

3943-
space = ' ' * (len(self.__class__.__name__) + 1)
3944-
prepr = (u(",\n%s") % space).join([u("%s=%s") % (k, v)
3945-
for k, v in attrs])
3946-
res = u("%s(%s)") % (self.__class__.__name__, prepr)
3947-
3948-
if not compat.PY3:
3949-
# needs to be str in Python 2
3950-
res = res.encode(encoding)
3951-
return res
3952-
3953-
def __unicode__(self):
3954-
"""
3955-
Return a string representation for a particular Index
3960+
def _format_space(self):
3961+
return "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
39563962

3957-
Invoked by unicode(df) in py2 only. Yields a Unicode String in both
3958-
py2/py3.
3959-
"""
3960-
rows = self.format(names=True)
3961-
max_rows = get_option('display.max_rows')
3962-
if len(rows) > max_rows:
3963-
spaces = (len(rows[0]) - 3) // 2
3964-
centered = ' ' * spaces
3965-
half = max_rows // 2
3966-
rows = rows[:half] + [centered + '...' + centered] + rows[-half:]
3967-
return "\n".join(rows)
3963+
def _format_data(self):
3964+
# we are formatting thru the attributes
3965+
return None
39683966

39693967
def __len__(self):
39703968
return len(self.labels[0])

pandas/tests/test_format.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -3215,13 +3215,13 @@ def test_date_explict_date_format(self):
32153215
class TestDatetimeIndexUnicode(tm.TestCase):
32163216
def test_dates(self):
32173217
text = str(pd.to_datetime([datetime(2013,1,1), datetime(2014,1,1)]))
3218-
self.assertTrue("[2013-01-01," in text)
3219-
self.assertTrue(", 2014-01-01]" in text)
3218+
self.assertTrue("['2013-01-01'," in text)
3219+
self.assertTrue(", '2014-01-01']" in text)
32203220

32213221
def test_mixed(self):
32223222
text = str(pd.to_datetime([datetime(2013,1,1), datetime(2014,1,1,12), datetime(2014,1,1)]))
3223-
self.assertTrue("[2013-01-01 00:00:00," in text)
3224-
self.assertTrue(", 2014-01-01 00:00:00]" in text)
3223+
self.assertTrue("['2013-01-01 00:00:00'," in text)
3224+
self.assertTrue(", '2014-01-01 00:00:00']" in text)
32253225

32263226

32273227
class TestStringRepTimestamp(tm.TestCase):

pandas/tests/test_index.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -2480,16 +2480,13 @@ def test_slice_keep_name(self):
24802480

24812481
class DatetimeLike(Base):
24822482

2483-
def test_repr_roundtrip(self):
2484-
raise nose.SkipTest("Short reprs are not supported repr for Datetimelike indexes")
2485-
24862483
def test_str(self):
24872484

24882485
# test the string repr
24892486
idx = self.create_index()
24902487
idx.name = 'foo'
2491-
self.assertTrue("length=%s" % len(idx) in str(idx))
2492-
self.assertTrue("u'foo'" in str(idx))
2488+
self.assertFalse("length=%s" % len(idx) in str(idx))
2489+
self.assertTrue("'foo'" in str(idx))
24932490
self.assertTrue(idx.__class__.__name__ in str(idx))
24942491

24952492
if hasattr(idx,'tz'):

pandas/tseries/base.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -255,17 +255,13 @@ def argmax(self, axis=None):
255255

256256
@property
257257
def _formatter_func(self):
258-
"""
259-
Format function to convert value to representation
260-
"""
261-
return str
258+
raise AbstractMethodError(self)
262259

263260
def _format_attrs(self):
264261
"""
265262
Return a list of tuples of the (attr,formatted_value)
266263
"""
267264
attrs = super(DatetimeIndexOpsMixin, self)._format_attrs()
268-
attrs.append(('length',len(self)))
269265
for attrib in self._attributes:
270266
if attrib == 'freq':
271267
freq = self.freqstr
@@ -499,4 +495,6 @@ def summary(self, name=None):
499495
if self.freq:
500496
result += '\nFreq: %s' % self.freqstr
501497

498+
# display as values, not quoted
499+
result = result.replace("'","")
502500
return result

pandas/tseries/period.py

+4
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,10 @@ def _to_embed(self, keep_tz=False):
293293
""" return an array repr of this object, potentially casting to object """
294294
return self.asobject.values
295295

296+
@property
297+
def _formatter_func(self):
298+
return lambda x: "'%s'" % x
299+
296300
def asof_locs(self, where, mask):
297301
"""
298302
where : array of timestamps

0 commit comments

Comments
 (0)