Skip to content

Commit c46bd61

Browse files
committed
fixup for CategoricalIndex merge
increase limits for max_seq_items & printing for Index add extended repr for datetimelike indexes fix tseries/test_base for repr adjust docs for repr-name use new format_data on all Index types
1 parent d1f0f01 commit c46bd61

File tree

8 files changed

+133
-67
lines changed

8 files changed

+133
-67
lines changed

doc/source/advanced.rst

+1-4
Original file line numberDiff line numberDiff line change
@@ -675,10 +675,7 @@ values NOT in the categories, similarly to how you can reindex ANY pandas index.
675675
}).set_index('B')
676676
677677
In [11]: df3.index
678-
Out[11]:
679-
CategoricalIndex([u'a', u'a', u'b', u'b', u'c', u'a'],
680-
categories=[u'a', u'b', u'c'],
681-
ordered=False)
678+
Out[11]: CategoricalIndex([u'a', u'a', u'b', u'b', u'c', u'a'], categories=[u'a', u'b', u'c'], ordered=False, name=u'B', dtype='category')
682679
683680
In [12]: pd.concat([df2,df3]
684681
TypeError: categories must match existing categories when appending

doc/source/whatsnew/v0.16.1.txt

+2-4
Original file line numberDiff line numberDiff line change
@@ -254,15 +254,13 @@ API changes
254254
Index Representation
255255
~~~~~~~~~~~~~~~~~~~~
256256

257-
The string representation of ``Index`` and its sub-classes have now been unified. These are all uniform in their output
258-
formats, except for ``MultiIndex``, which has a multi-line repr. The display width responds to the option ``display.max_seq_len``,
259-
which is now defaulted to 10 (previously was 100). (:issue:`6482`)
257+
The string representation of ``Index`` and its sub-classes have now been unified. ``Index, Int64Index, Float64Index, CategoricalIndex`` are single-line display. The datetimelikes ``DatetimeIndex, PeriodIndex, TimedeltaIndex`` & ``MultiIndex`` will display in a multi-line format showing much more of the index values. The display width responds to the option ``display.max_seq_items``,
258+
which is now defaulted to 20 (previously was 100). (:issue:`6482`)
260259

261260
Previous Behavior
262261

263262
.. code-block:: python
264263

265-
266264
In [1]: pd.get_option('max_seq_items')
267265
Out[1]: 100
268266

pandas/core/common.py

+18-10
Original file line numberDiff line numberDiff line change
@@ -3132,7 +3132,7 @@ def in_ipython_frontend():
31323132
# working with straight ascii.
31333133

31343134

3135-
def _pprint_seq(seq, _nest_lvl=0, **kwds):
3135+
def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds):
31363136
"""
31373137
internal. pprinter for iterables. you should probably use pprint_thing()
31383138
rather then calling this directly.
@@ -3144,12 +3144,15 @@ def _pprint_seq(seq, _nest_lvl=0, **kwds):
31443144
else:
31453145
fmt = u("[%s]") if hasattr(seq, '__setitem__') else u("(%s)")
31463146

3147-
nitems = get_option("max_seq_items") or len(seq)
3147+
if max_seq_items is False:
3148+
nitems = len(seq)
3149+
else:
3150+
nitems = max_seq_items or get_option("max_seq_items") or len(seq)
31483151

31493152
s = iter(seq)
31503153
r = []
31513154
for i in range(min(nitems, len(seq))): # handle sets, no slicing
3152-
r.append(pprint_thing(next(s), _nest_lvl + 1, **kwds))
3155+
r.append(pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds))
31533156
body = ", ".join(r)
31543157

31553158
if nitems < len(seq):
@@ -3160,7 +3163,7 @@ def _pprint_seq(seq, _nest_lvl=0, **kwds):
31603163
return fmt % body
31613164

31623165

3163-
def _pprint_dict(seq, _nest_lvl=0, **kwds):
3166+
def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds):
31643167
"""
31653168
internal. pprinter for iterables. you should probably use pprint_thing()
31663169
rather then calling this directly.
@@ -3170,11 +3173,14 @@ def _pprint_dict(seq, _nest_lvl=0, **kwds):
31703173

31713174
pfmt = u("%s: %s")
31723175

3173-
nitems = get_option("max_seq_items") or len(seq)
3176+
if max_seq_items is False:
3177+
nitems = len(seq)
3178+
else:
3179+
nitems = max_seq_items or get_option("max_seq_items") or len(seq)
31743180

31753181
for k, v in list(seq.items())[:nitems]:
3176-
pairs.append(pfmt % (pprint_thing(k, _nest_lvl + 1, **kwds),
3177-
pprint_thing(v, _nest_lvl + 1, **kwds)))
3182+
pairs.append(pfmt % (pprint_thing(k, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds),
3183+
pprint_thing(v, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)))
31783184

31793185
if nitems < len(seq):
31803186
return fmt % (", ".join(pairs) + ", ...")
@@ -3183,7 +3189,7 @@ def _pprint_dict(seq, _nest_lvl=0, **kwds):
31833189

31843190

31853191
def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False,
3186-
quote_strings=False):
3192+
quote_strings=False, max_seq_items=None):
31873193
"""
31883194
This function is the sanctioned way of converting objects
31893195
to a unicode representation.
@@ -3202,6 +3208,8 @@ def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False,
32023208
replacements
32033209
default_escapes : bool, default False
32043210
Whether the input escape characters replaces or adds to the defaults
3211+
max_seq_items : False, int, default None
3212+
Pass thru to other pretty printers to limit sequence printing
32053213
32063214
Returns
32073215
-------
@@ -3240,11 +3248,11 @@ def as_escaped_unicode(thing, escape_chars=escape_chars):
32403248
return compat.text_type(thing)
32413249
elif (isinstance(thing, dict) and
32423250
_nest_lvl < get_option("display.pprint_nest_depth")):
3243-
result = _pprint_dict(thing, _nest_lvl, quote_strings=True)
3251+
result = _pprint_dict(thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items)
32443252
elif is_sequence(thing) and _nest_lvl < \
32453253
get_option("display.pprint_nest_depth"):
32463254
result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars,
3247-
quote_strings=quote_strings)
3255+
quote_strings=quote_strings, max_seq_items=max_seq_items)
32483256
elif isinstance(thing, compat.string_types) and quote_strings:
32493257
if compat.PY3:
32503258
fmt = "'%s'"

pandas/core/config_init.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ def mpl_style_cb(key):
269269
cf.register_option('show_dimensions', 'truncate', pc_show_dimensions_doc,
270270
validator=is_one_of_factory([True, False, 'truncate']))
271271
cf.register_option('chop_threshold', None, pc_chop_threshold_doc)
272-
cf.register_option('max_seq_items', 10, pc_max_seq_items)
272+
cf.register_option('max_seq_items', 20, pc_max_seq_items)
273273
cf.register_option('mpl_style', None, pc_mpl_style_doc,
274274
validator=is_one_of_factory([None, False, 'default']),
275275
cb=mpl_style_cb)

pandas/core/index.py

+49-29
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,10 @@
2626
from pandas.io.common import PerformanceWarning
2727

2828
# simplify
29-
default_pprint = lambda x: com.pprint_thing(x, escape_chars=('\t', '\r', '\n'),
30-
quote_strings=True)
29+
default_pprint = lambda x, max_seq_items=None: com.pprint_thing(x,
30+
escape_chars=('\t', '\r', '\n'),
31+
quote_strings=True,
32+
max_seq_items=max_seq_items)
3133

3234

3335
__all__ = ['Index']
@@ -430,6 +432,37 @@ def _formatter_func(self):
430432
return default_pprint
431433

432434
def _format_data(self):
435+
"""
436+
Return the formatted data as a unicode string
437+
"""
438+
space1 = "\n%s" % (' ' * (len(self.__class__.__name__) + 2))
439+
space2 = "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
440+
sep = ',%s' % space1
441+
max_seq_items = get_option('display.max_seq_items')
442+
formatter = self._formatter_func
443+
n = len(self)
444+
if n == 0:
445+
summary = '[]'
446+
elif n == 1:
447+
first = formatter(self[0])
448+
summary = '[%s]' % first
449+
elif n == 2:
450+
first = formatter(self[0])
451+
last = formatter(self[-1])
452+
summary = '[%s%s%s]' % (first, sep, last)
453+
elif n > max_seq_items:
454+
n = min(max_seq_items//2,10)
455+
456+
head = sep.join([ formatter(x) for x in self[:n] ])
457+
tail = sep.join([ formatter(x) for x in self[-n:] ])
458+
summary = '[%s%s...%s%s]' % (head, space1, space1, tail)
459+
else:
460+
values = sep.join([ formatter(x) for x in self ])
461+
summary = '[%s]' % (values)
462+
463+
return summary
464+
465+
def _format_data2(self):
433466
"""
434467
Return the formatted data as a unicode string
435468
"""
@@ -446,7 +479,7 @@ def _format_data(self):
446479
last = formatter(self[-1])
447480
summary = '[%s, %s]' % (first, last)
448481
elif n > max_seq_items:
449-
n = min(max_seq_items//2,2)
482+
n = min(max_seq_items//2,5)
450483
head = ', '.join([ formatter(x) for x in self[:n] ])
451484
tail = ', '.join([ formatter(x) for x in self[-n:] ])
452485
summary = '[%s, ..., %s]' % (head, tail)
@@ -2874,32 +2907,19 @@ def equals(self, other):
28742907

28752908
return False
28762909

2877-
def __unicode__(self):
2910+
def _format_attrs(self):
28782911
"""
2879-
Return a string representation for this object.
2880-
2881-
Invoked by unicode(df) in py2 only. Yields a Unicode String in both
2882-
py2/py3.
2912+
Return a list of tuples of the (attr,formatted_value)
28832913
"""
2884-
2885-
# currently doesn't use the display.max_categories, or display.max_seq_len
2886-
# for head/tail printing
2887-
values = default_pprint(self.values.get_values())
2888-
cats = default_pprint(self.categories.get_values())
2889-
space = ' ' * (len(self.__class__.__name__) + 1)
2890-
name = self.name
2891-
if name is not None:
2892-
name = default_pprint(name)
2893-
2894-
result = u("{klass}({values},\n{space}categories={categories},\n{space}ordered={ordered},\n{space}name={name})").format(
2895-
klass=self.__class__.__name__,
2896-
values=values,
2897-
categories=cats,
2898-
ordered=self.ordered,
2899-
name=name,
2900-
space=space)
2901-
2902-
return result
2914+
attrs = [('categories', default_pprint(self.categories)),
2915+
('ordered',self.ordered)]
2916+
if self.name is not None:
2917+
attrs.append(('name',default_pprint(self.name)))
2918+
attrs.append(('dtype',"'%s'" % self.dtype))
2919+
max_seq_items = get_option('display.max_seq_items')
2920+
if len(self) > max_seq_items:
2921+
attrs.append(('length',len(self)))
2922+
return attrs
29032923

29042924
@property
29052925
def inferred_type(self):
@@ -3955,8 +3975,8 @@ def _format_attrs(self):
39553975
"""
39563976
Return a list of tuples of the (attr,formatted_value)
39573977
"""
3958-
attrs = [('levels', default_pprint(self.levels)),
3959-
('labels', default_pprint(self.labels))]
3978+
attrs = [('levels', default_pprint(self._levels, max_seq_items=False)),
3979+
('labels', default_pprint(self._labels, max_seq_items=False))]
39603980
if not all(name is None for name in self.names):
39613981
attrs.append(('names', default_pprint(self.names)))
39623982
if self.sortorder is not None:

pandas/tests/test_index.py

+23-3
Original file line numberDiff line numberDiff line change
@@ -1711,7 +1711,7 @@ def test_get_indexer(self):
17111711
self.assertRaises(NotImplementedError, lambda : idx2.get_indexer(idx1, method='backfill'))
17121712
self.assertRaises(NotImplementedError, lambda : idx2.get_indexer(idx1, method='nearest'))
17131713

1714-
def test_repr(self):
1714+
def test_repr_roundtrip(self):
17151715

17161716
ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True)
17171717
str(ci)
@@ -1724,9 +1724,12 @@ def test_repr(self):
17241724
compat.text_type(ci)
17251725

17261726
# long format
1727+
# this is not reprable
17271728
ci = CategoricalIndex(np.random.randint(0,5,size=100))
1728-
result = str(ci)
1729-
tm.assert_index_equal(eval(repr(ci)),ci,exact=True)
1729+
if compat.PY3:
1730+
str(ci)
1731+
else:
1732+
compat.text_type(ci)
17301733

17311734
def test_isin(self):
17321735

@@ -4417,6 +4420,23 @@ def test_repr_with_unicode_data(self):
44174420
index = pd.DataFrame(d).set_index(["a", "b"]).index
44184421
self.assertFalse("\\u" in repr(index)) # we don't want unicode-escaped
44194422

4423+
def test_repr_roundtrip(self):
4424+
4425+
mi = MultiIndex.from_product([list('ab'),range(3)],names=['first','second'])
4426+
str(mi)
4427+
tm.assert_index_equal(eval(repr(mi)),mi,exact=True)
4428+
4429+
# formatting
4430+
if compat.PY3:
4431+
str(mi)
4432+
else:
4433+
compat.text_type(mi)
4434+
4435+
# long format
4436+
mi = MultiIndex.from_product([list('abcdefg'),range(10)],names=['first','second'])
4437+
result = str(mi)
4438+
tm.assert_index_equal(eval(repr(mi)),mi,exact=True)
4439+
44204440
def test_str(self):
44214441
# tested elsewhere
44224442
pass

pandas/tseries/base.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
infer_freq, to_offset, get_period_alias,
1818
Resolution)
1919
import pandas.algos as _algos
20+
from pandas.core.config import get_option
2021

2122
class DatetimeIndexOpsMixin(object):
2223
""" common ops mixin to support a unified inteface datetimelike Index """
@@ -79,9 +80,9 @@ def freqstr(self):
7980

8081
@cache_readonly
8182
def inferred_freq(self):
82-
"""
83-
Trys to return a string representing a frequency guess,
84-
generated by infer_freq. Returns None if it can't autodetect the
83+
"""
84+
Trys to return a string representing a frequency guess,
85+
generated by infer_freq. Returns None if it can't autodetect the
8586
frequency.
8687
"""
8788
try:

pandas/tseries/tests/test_base.py

+35-13
Original file line numberDiff line numberDiff line change
@@ -123,13 +123,20 @@ def test_representation(self):
123123

124124
exp2 = """DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', freq='D', tz=None)"""
125125

126-
exp3 = """DatetimeIndex(['2011-01-01', '2011-01-02'], dtype='datetime64[ns]', freq='D', tz=None)"""
126+
exp3 = """DatetimeIndex(['2011-01-01'
127+
'2011-01-02'], dtype='datetime64[ns]', freq='D', tz=None)"""
127128

128-
exp4 = """DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], dtype='datetime64[ns]', freq='D', tz=None)"""
129+
exp4 = """DatetimeIndex(['2011-01-01',
130+
'2011-01-02',
131+
'2011-01-03'], dtype='datetime64[ns]', freq='D', tz=None)"""
129132

130-
exp5 = """DatetimeIndex(['2011-01-01 09:00:00+09:00', '2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00'], dtype='datetime64[ns]', freq='H', tz='Asia/Tokyo')"""
133+
exp5 = """DatetimeIndex(['2011-01-01 09:00:00+09:00',
134+
'2011-01-01 10:00:00+09:00',
135+
'2011-01-01 11:00:00+09:00'], dtype='datetime64[ns]', freq='H', tz='Asia/Tokyo')"""
131136

132-
exp6 = """DatetimeIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', 'NaT'], dtype='datetime64[ns]', freq=None, tz='US/Eastern')"""
137+
exp6 = """DatetimeIndex(['2011-01-01 09:00:00-05:00',
138+
'2011-01-01 10:00:00-05:00',
139+
'NaT'], dtype='datetime64[ns]', freq=None, tz='US/Eastern')"""
133140

134141
for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6],
135142
[exp1, exp2, exp3, exp4, exp5, exp6]):
@@ -370,11 +377,16 @@ def test_representation(self):
370377

371378
exp2 = """TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', freq='D')"""
372379

373-
exp3 = """TimedeltaIndex(['1 days', '2 days'], dtype='timedelta64[ns]', freq='D')"""
380+
exp3 = """TimedeltaIndex(['1 days'
381+
'2 days'], dtype='timedelta64[ns]', freq='D')"""
374382

375-
exp4 = """TimedeltaIndex(['1 days', '2 days', '3 days'], dtype='timedelta64[ns]', freq='D')"""
383+
exp4 = """TimedeltaIndex(['1 days',
384+
'2 days',
385+
'3 days'], dtype='timedelta64[ns]', freq='D')"""
376386

377-
exp5 = """TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', '3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)"""
387+
exp5 = """TimedeltaIndex(['1 days 00:00:01',
388+
'2 days 00:00:00',
389+
'3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)"""
378390

379391
for idx, expected in zip([idx1, idx2, idx3, idx4, idx5],
380392
[exp1, exp2, exp3, exp4, exp5]):
@@ -834,19 +846,29 @@ def test_representation(self):
834846

835847
exp2 = """PeriodIndex(['2011-01-01'], dtype='int64', freq='D')"""
836848

837-
exp3 = """PeriodIndex(['2011-01-01', '2011-01-02'], dtype='int64', freq='D')"""
849+
exp3 = """PeriodIndex(['2011-01-01'
850+
'2011-01-02'], dtype='int64', freq='D')"""
838851

839-
exp4 = """PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], dtype='int64', freq='D')"""
852+
exp4 = """PeriodIndex(['2011-01-01',
853+
'2011-01-02',
854+
'2011-01-03'], dtype='int64', freq='D')"""
840855

841-
exp5 = """PeriodIndex(['2011', '2012', '2013'], dtype='int64', freq='A-DEC')"""
856+
exp5 = """PeriodIndex(['2011',
857+
'2012',
858+
'2013'], dtype='int64', freq='A-DEC')"""
842859

843-
exp6 = """PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], dtype='int64', freq='H')"""
860+
exp6 = """PeriodIndex(['2011-01-01 09:00',
861+
'2012-02-01 10:00',
862+
'NaT'], dtype='int64', freq='H')"""
844863

845864
exp7 = """PeriodIndex(['2013Q1'], dtype='int64', freq='Q-DEC')"""
846865

847-
exp8 = """PeriodIndex(['2013Q1', '2013Q2'], dtype='int64', freq='Q-DEC')"""
866+
exp8 = """PeriodIndex(['2013Q1'
867+
'2013Q2'], dtype='int64', freq='Q-DEC')"""
848868

849-
exp9 = """PeriodIndex(['2013Q1', '2013Q2', '2013Q3'], dtype='int64', freq='Q-DEC')"""
869+
exp9 = """PeriodIndex(['2013Q1',
870+
'2013Q2',
871+
'2013Q3'], dtype='int64', freq='Q-DEC')"""
850872

851873
for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9],
852874
[exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9]):

0 commit comments

Comments
 (0)