Skip to content

Commit 3e4f8f0

Browse files
committed
formatting MultiIndex
1 parent 8b83005 commit 3e4f8f0

File tree

8 files changed

+131
-155
lines changed

8 files changed

+131
-155
lines changed

doc/source/whatsnew/v0.16.1.txt

+45
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,51 @@ API changes
159159

160160
- By default, ``read_csv`` and ``read_table`` will now try to infer the compression type based on the file extension. Set ``compression=None`` to restore the previous behavior (no decompression). (:issue:`9770`)
161161

162+
.. _whatsnew_0161.index_repr:
163+
164+
Index Representation
165+
~~~~~~~~~~~~~~~~~~~~
166+
167+
The string representation of ``Index`` and its sub-classes have now been unified. These are all uniform in their output
168+
formats, except for ``MultiIndex``, which has a multi-line repr. The display width responds to the option ``display.max_seq_len``,
169+
which is now defaulted to 10 (previously was 100). (:issue:`6482`)
170+
171+
Previous Behavior
172+
173+
.. code-block:: python
174+
175+
176+
In [1]: pd.get_option('max_seq_items')
177+
Out[1]: 100
178+
179+
In [2]: pd.Index(range(4),name='foo')
180+
Out[2]: Int64Index([0, 1, 2, 3], dtype='int64')
181+
182+
In [3]: pd.Index(range(104),name='foo')
183+
Out[3]: Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, ...], dtype='int64')
184+
185+
In [4]: pd.date_range('20130101',periods=4,name='foo',tz='US/Eastern')
186+
Out[4]:
187+
<class 'pandas.tseries.index.DatetimeIndex'>
188+
[2013-01-01 00:00:00-05:00, ..., 2013-01-04 00:00:00-05:00]
189+
Length: 4, Freq: D, Timezone: US/Eastern
190+
191+
In [5]: pd.date_range('20130101',periods=104,name='foo',tz='US/Eastern')
192+
Out[5]:
193+
<class 'pandas.tseries.index.DatetimeIndex'>
194+
[2013-01-01 00:00:00-05:00, ..., 2013-04-14 00:00:00-04:00]
195+
Length: 104, Freq: D, Timezone: US/Eastern
196+
197+
New Behavior
198+
199+
.. ipython:: python
200+
201+
pd.get_option('max_seq_items')
202+
pd.Index(range(4),name='foo')
203+
pd.Index(range(104),name='foo')
204+
pd.date_range('20130101',periods=4,name='foo',tz='US/Eastern')
205+
pd.date_range('20130101',periods=104,name='foo',tz='US/Eastern')
206+
162207
.. _whatsnew_0161.performance:
163208

164209
Performance Improvements

pandas/core/config_init.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ def mpl_style_cb(key):
269269
cf.register_option('show_dimensions', 'truncate', pc_show_dimensions_doc,
270270
validator=is_one_of_factory([True, False, 'truncate']))
271271
cf.register_option('chop_threshold', None, pc_chop_threshold_doc)
272-
cf.register_option('max_seq_items', 100, pc_max_seq_items)
272+
cf.register_option('max_seq_items', 10, pc_max_seq_items)
273273
cf.register_option('mpl_style', None, pc_mpl_style_doc,
274274
validator=is_one_of_factory([None, False, 'default']),
275275
cb=mpl_style_cb)

pandas/core/index.py

+35-37
Original file line numberDiff line numberDiff line change
@@ -395,21 +395,33 @@ def __unicode__(self):
395395
klass = self.__class__.__name__
396396
data = self._format_data()
397397
attrs = self._format_attrs()
398-
max_seq_items = get_option('display.max_seq_items')
399-
if len(self) > max_seq_items:
400-
space = "\n%s" % (' ' * (len(klass) + 1))
401-
else:
402-
space = " "
398+
space = self._format_space()
403399

404400
prepr = (u(",%s") % space).join([u("%s=%s") % (k, v)
405401
for k, v in attrs])
406-
res = u("%s(%s,%s%s)") % (klass,
407-
data,
408-
space,
409-
prepr)
402+
403+
# no data provided, just attributes
404+
if data is None:
405+
data = ''
406+
else:
407+
data = "%s,%s" % (data, space)
408+
409+
res = u("%s(%s%s)") % (klass,
410+
data,
411+
prepr)
410412

411413
return res
412414

415+
def _format_space(self):
416+
417+
# using space here controls if the attributes
418+
# are line separated or not (the default)
419+
420+
#max_seq_items = get_option('display.max_seq_items')
421+
#if len(self) > max_seq_items:
422+
# space = "\n%s" % (' ' * (len(klass) + 1))
423+
return " "
424+
413425
@property
414426
def _formatter_func(self):
415427
"""
@@ -421,7 +433,6 @@ def _format_data(self):
421433
"""
422434
Return the formatted data as a unicode string
423435
"""
424-
425436
max_seq_items = get_option('display.max_seq_items')
426437
formatter = self._formatter_func
427438
n = len(self)
@@ -450,9 +461,12 @@ def _format_attrs(self):
450461
Return a list of tuples of the (attr,formatted_value)
451462
"""
452463
attrs = []
464+
attrs.append(('dtype',"'%s'" % self.dtype))
453465
if self.name is not None:
454466
attrs.append(('name',default_pprint(self.name)))
455-
attrs.append(('dtype',"'%s'" % self.dtype))
467+
max_seq_items = get_option('display.max_seq_items')
468+
if len(self) > max_seq_items:
469+
attrs.append(('length',len(self)))
456470
return attrs
457471

458472
def to_series(self, **kwargs):
@@ -3914,40 +3928,24 @@ def nbytes(self):
39143928
names_nbytes = sum(( getsizeof(i) for i in self.names ))
39153929
return level_nbytes + label_nbytes + names_nbytes
39163930

3917-
def __repr__(self):
3918-
encoding = get_option('display.encoding')
3931+
def _format_attrs(self):
3932+
"""
3933+
Return a list of tuples of the (attr,formatted_value)
3934+
"""
39193935
attrs = [('levels', default_pprint(self.levels)),
39203936
('labels', default_pprint(self.labels))]
39213937
if not all(name is None for name in self.names):
39223938
attrs.append(('names', default_pprint(self.names)))
39233939
if self.sortorder is not None:
39243940
attrs.append(('sortorder', default_pprint(self.sortorder)))
3941+
return attrs
39253942

3926-
space = ' ' * (len(self.__class__.__name__) + 1)
3927-
prepr = (u(",\n%s") % space).join([u("%s=%s") % (k, v)
3928-
for k, v in attrs])
3929-
res = u("%s(%s)") % (self.__class__.__name__, prepr)
3930-
3931-
if not compat.PY3:
3932-
# needs to be str in Python 2
3933-
res = res.encode(encoding)
3934-
return res
3935-
3936-
def __unicode__(self):
3937-
"""
3938-
Return a string representation for a particular Index
3943+
def _format_space(self):
3944+
return "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
39393945

3940-
Invoked by unicode(df) in py2 only. Yields a Unicode String in both
3941-
py2/py3.
3942-
"""
3943-
rows = self.format(names=True)
3944-
max_rows = get_option('display.max_rows')
3945-
if len(rows) > max_rows:
3946-
spaces = (len(rows[0]) - 3) // 2
3947-
centered = ' ' * spaces
3948-
half = max_rows // 2
3949-
rows = rows[:half] + [centered + '...' + centered] + rows[-half:]
3950-
return "\n".join(rows)
3946+
def _format_data(self):
3947+
# we are formatting thru the attributes
3948+
return None
39513949

39523950
def __len__(self):
39533951
return len(self.labels[0])

pandas/tests/test_format.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -3200,13 +3200,13 @@ def test_date_explict_date_format(self):
32003200
class TestDatetimeIndexUnicode(tm.TestCase):
32013201
def test_dates(self):
32023202
text = str(pd.to_datetime([datetime(2013,1,1), datetime(2014,1,1)]))
3203-
self.assertTrue("[2013-01-01," in text)
3204-
self.assertTrue(", 2014-01-01]" in text)
3203+
self.assertTrue("['2013-01-01'," in text)
3204+
self.assertTrue(", '2014-01-01']" in text)
32053205

32063206
def test_mixed(self):
32073207
text = str(pd.to_datetime([datetime(2013,1,1), datetime(2014,1,1,12), datetime(2014,1,1)]))
3208-
self.assertTrue("[2013-01-01 00:00:00," in text)
3209-
self.assertTrue(", 2014-01-01 00:00:00]" in text)
3208+
self.assertTrue("['2013-01-01 00:00:00'," in text)
3209+
self.assertTrue(", '2014-01-01 00:00:00']" in text)
32103210

32113211

32123212
class TestStringRepTimestamp(tm.TestCase):

pandas/tests/test_index.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -2454,16 +2454,13 @@ def test_slice_keep_name(self):
24542454

24552455
class DatetimeLike(Base):
24562456

2457-
def test_repr_roundtrip(self):
2458-
raise nose.SkipTest("Short reprs are not supported repr for Datetimelike indexes")
2459-
24602457
def test_str(self):
24612458

24622459
# test the string repr
24632460
idx = self.create_index()
24642461
idx.name = 'foo'
2465-
self.assertTrue("length=%s" % len(idx) in str(idx))
2466-
self.assertTrue("u'foo'" in str(idx))
2462+
self.assertFalse("length=%s" % len(idx) in str(idx))
2463+
self.assertTrue("'foo'" in str(idx))
24672464
self.assertTrue(idx.__class__.__name__ in str(idx))
24682465

24692466
if hasattr(idx,'tz'):

pandas/tseries/base.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -255,17 +255,13 @@ def argmax(self, axis=None):
255255

256256
@property
257257
def _formatter_func(self):
258-
"""
259-
Format function to convert value to representation
260-
"""
261-
return str
258+
raise AbstractMethodError(self)
262259

263260
def _format_attrs(self):
264261
"""
265262
Return a list of tuples of the (attr,formatted_value)
266263
"""
267264
attrs = super(DatetimeIndexOpsMixin, self)._format_attrs()
268-
attrs.append(('length',len(self)))
269265
for attrib in self._attributes:
270266
if attrib == 'freq':
271267
freq = self.freqstr
@@ -499,4 +495,6 @@ def summary(self, name=None):
499495
if self.freq:
500496
result += '\nFreq: %s' % self.freqstr
501497

498+
# display as values, not quoted
499+
result = result.replace("'","")
502500
return result

pandas/tseries/period.py

+4
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,10 @@ def _to_embed(self, keep_tz=False):
293293
""" return an array repr of this object, potentially casting to object """
294294
return self.asobject.values
295295

296+
@property
297+
def _formatter_func(self):
298+
return lambda x: "'%s'" % x
299+
296300
def asof_locs(self, where, mask):
297301
"""
298302
where : array of timestamps

0 commit comments

Comments
 (0)