Skip to content

Commit c6a5a4a

Browse files
committed
fixup for CategoricalIndex merge
1 parent d994bb7 commit c6a5a4a

File tree

4 files changed

+55
-38
lines changed

4 files changed

+55
-38
lines changed

doc/source/whatsnew/v0.16.1.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ Index Representation
213213
~~~~~~~~~~~~~~~~~~~~
214214

215215
The string representation of ``Index`` and its sub-classes have now been unified. These are all uniform in their output
216-
formats, except for ``MultiIndex``, which has a multi-line repr. The display width responds to the option ``display.max_seq_len``,
216+
formats to a single line, except for ``MultiIndex/CategoricalIndex``, which have a multi-line repr. The display width responds to the option ``display.max_seq_len``,
217217
which is now defaulted to 10 (previously was 100). (:issue:`6482`)
218218

219219
Previous Behavior

pandas/core/common.py

+18-10
Original file line numberDiff line numberDiff line change
@@ -3132,7 +3132,7 @@ def in_ipython_frontend():
31323132
# working with straight ascii.
31333133

31343134

3135-
def _pprint_seq(seq, _nest_lvl=0, **kwds):
3135+
def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds):
31363136
"""
31373137
internal. pprinter for iterables. you should probably use pprint_thing()
31383138
rather then calling this directly.
@@ -3144,12 +3144,15 @@ def _pprint_seq(seq, _nest_lvl=0, **kwds):
31443144
else:
31453145
fmt = u("[%s]") if hasattr(seq, '__setitem__') else u("(%s)")
31463146

3147-
nitems = get_option("max_seq_items") or len(seq)
3147+
if max_seq_items is False:
3148+
nitems = len(seq)
3149+
else:
3150+
nitems = max_seq_items or get_option("max_seq_items") or len(seq)
31483151

31493152
s = iter(seq)
31503153
r = []
31513154
for i in range(min(nitems, len(seq))): # handle sets, no slicing
3152-
r.append(pprint_thing(next(s), _nest_lvl + 1, **kwds))
3155+
r.append(pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds))
31533156
body = ", ".join(r)
31543157

31553158
if nitems < len(seq):
@@ -3160,7 +3163,7 @@ def _pprint_seq(seq, _nest_lvl=0, **kwds):
31603163
return fmt % body
31613164

31623165

3163-
def _pprint_dict(seq, _nest_lvl=0, **kwds):
3166+
def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds):
31643167
"""
31653168
internal. pprinter for iterables. you should probably use pprint_thing()
31663169
rather then calling this directly.
@@ -3170,11 +3173,14 @@ def _pprint_dict(seq, _nest_lvl=0, **kwds):
31703173

31713174
pfmt = u("%s: %s")
31723175

3173-
nitems = get_option("max_seq_items") or len(seq)
3176+
if max_seq_items is False:
3177+
nitems = len(seq)
3178+
else:
3179+
nitems = max_seq_items or get_option("max_seq_items") or len(seq)
31743180

31753181
for k, v in list(seq.items())[:nitems]:
3176-
pairs.append(pfmt % (pprint_thing(k, _nest_lvl + 1, **kwds),
3177-
pprint_thing(v, _nest_lvl + 1, **kwds)))
3182+
pairs.append(pfmt % (pprint_thing(k, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds),
3183+
pprint_thing(v, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)))
31783184

31793185
if nitems < len(seq):
31803186
return fmt % (", ".join(pairs) + ", ...")
@@ -3183,7 +3189,7 @@ def _pprint_dict(seq, _nest_lvl=0, **kwds):
31833189

31843190

31853191
def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False,
3186-
quote_strings=False):
3192+
quote_strings=False, max_seq_items=None):
31873193
"""
31883194
This function is the sanctioned way of converting objects
31893195
to a unicode representation.
@@ -3202,6 +3208,8 @@ def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False,
32023208
replacements
32033209
default_escapes : bool, default False
32043210
Whether the input escape characters replaces or adds to the defaults
3211+
max_seq_items : False, int, default None
3212+
Pass thru to other pretty printers to limit sequence printing
32053213
32063214
Returns
32073215
-------
@@ -3240,11 +3248,11 @@ def as_escaped_unicode(thing, escape_chars=escape_chars):
32403248
return compat.text_type(thing)
32413249
elif (isinstance(thing, dict) and
32423250
_nest_lvl < get_option("display.pprint_nest_depth")):
3243-
result = _pprint_dict(thing, _nest_lvl, quote_strings=True)
3251+
result = _pprint_dict(thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items)
32443252
elif is_sequence(thing) and _nest_lvl < \
32453253
get_option("display.pprint_nest_depth"):
32463254
result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars,
3247-
quote_strings=quote_strings)
3255+
quote_strings=quote_strings, max_seq_items=max_seq_items)
32483256
elif isinstance(thing, compat.string_types) and quote_strings:
32493257
if compat.PY3:
32503258
fmt = "'%s'"

pandas/core/index.py

+18-26
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,10 @@
2626
from pandas.io.common import PerformanceWarning
2727

2828
# simplify
29-
default_pprint = lambda x: com.pprint_thing(x, escape_chars=('\t', '\r', '\n'),
30-
quote_strings=True)
29+
default_pprint = lambda x, max_seq_items=None: com.pprint_thing(x,
30+
escape_chars=('\t', '\r', '\n'),
31+
quote_strings=True,
32+
max_seq_items=max_seq_items)
3133

3234

3335
__all__ = ['Index']
@@ -2868,32 +2870,22 @@ def equals(self, other):
28682870

28692871
return False
28702872

2871-
def __unicode__(self):
2873+
def _format_attrs(self):
28722874
"""
2873-
Return a string representation for this object.
2874-
2875-
Invoked by unicode(df) in py2 only. Yields a Unicode String in both
2876-
py2/py3.
2875+
Return a list of tuples of the (attr,formatted_value)
28772876
"""
2877+
attrs = [('categories', default_pprint(self.categories, max_seq_items=False)),
2878+
('ordered',self.ordered)]
2879+
if self.name is not None:
2880+
attrs.append(('name',default_pprint(self.name)))
2881+
attrs.append(('dtype',"'%s'" % self.dtype))
2882+
return attrs
28782883

2879-
# currently doesn't use the display.max_categories, or display.max_seq_len
2880-
# for head/tail printing
2881-
values = default_pprint(self.values.get_values())
2882-
cats = default_pprint(self.categories.get_values())
2883-
space = ' ' * (len(self.__class__.__name__) + 1)
2884-
name = self.name
2885-
if name is not None:
2886-
name = default_pprint(name)
2887-
2888-
result = u("{klass}({values},\n{space}categories={categories},\n{space}ordered={ordered},\n{space}name={name})").format(
2889-
klass=self.__class__.__name__,
2890-
values=values,
2891-
categories=cats,
2892-
ordered=self.ordered,
2893-
name=name,
2894-
space=space)
2884+
def _format_space(self):
2885+
return "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
28952886

2896-
return result
2887+
def _format_data(self):
2888+
return default_pprint(self.values.get_values(), max_seq_items=False)
28972889

28982890
@property
28992891
def inferred_type(self):
@@ -3949,8 +3941,8 @@ def _format_attrs(self):
39493941
"""
39503942
Return a list of tuples of the (attr,formatted_value)
39513943
"""
3952-
attrs = [('levels', default_pprint(self.levels)),
3953-
('labels', default_pprint(self.labels))]
3944+
attrs = [('levels', default_pprint(self._levels, max_seq_items=False)),
3945+
('labels', default_pprint(self._labels, max_seq_items=False))]
39543946
if not all(name is None for name in self.names):
39553947
attrs.append(('names', default_pprint(self.names)))
39563948
if self.sortorder is not None:

pandas/tests/test_index.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -1704,7 +1704,7 @@ def test_get_indexer(self):
17041704
self.assertRaises(NotImplementedError, lambda : idx2.get_indexer(idx1, method='backfill'))
17051705
self.assertRaises(NotImplementedError, lambda : idx2.get_indexer(idx1, method='nearest'))
17061706

1707-
def test_repr(self):
1707+
def test_repr_roundtrip(self):
17081708

17091709
ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True)
17101710
str(ci)
@@ -4410,6 +4410,23 @@ def test_repr_with_unicode_data(self):
44104410
index = pd.DataFrame(d).set_index(["a", "b"]).index
44114411
self.assertFalse("\\u" in repr(index)) # we don't want unicode-escaped
44124412

4413+
def test_repr_roundtrip(self):
4414+
4415+
mi = MultiIndex.from_product([list('ab'),range(3)],names=['first','second'])
4416+
str(mi)
4417+
tm.assert_index_equal(eval(repr(mi)),mi,exact=True)
4418+
4419+
# formatting
4420+
if compat.PY3:
4421+
str(mi)
4422+
else:
4423+
compat.text_type(mi)
4424+
4425+
# long format
4426+
mi = MultiIndex.from_product([list('abcdefg'),range(10)],names=['first','second'])
4427+
result = str(mi)
4428+
tm.assert_index_equal(eval(repr(mi)),mi,exact=True)
4429+
44134430
def test_str(self):
44144431
# tested elsewhere
44154432
pass

0 commit comments

Comments
 (0)