Skip to content

Commit 4b42791

Browse files
committed
fixup for CategoricalIndex merge
1 parent 7e8f7fa commit 4b42791

File tree

4 files changed

+55
-38
lines changed

4 files changed

+55
-38
lines changed

doc/source/whatsnew/v0.16.1.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ Index Representation
167167
~~~~~~~~~~~~~~~~~~~~
168168

169169
The string representation of ``Index`` and its sub-classes have now been unified. These are all uniform in their output
170-
formats, except for ``MultiIndex``, which has a multi-line repr. The display width responds to the option ``display.max_seq_len``,
170+
formats to a single line, except for ``MultiIndex/CategoricalIndex``, which have a multi-line repr. The display width responds to the option ``display.max_seq_len``,
171171
which is now defaulted to 10 (previously was 100). (:issue:`6482`)
172172

173173
Previous Behavior

pandas/core/common.py

+18-10
Original file line numberDiff line numberDiff line change
@@ -3132,7 +3132,7 @@ def in_ipython_frontend():
31323132
# working with straight ascii.
31333133

31343134

3135-
def _pprint_seq(seq, _nest_lvl=0, **kwds):
3135+
def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds):
31363136
"""
31373137
internal. pprinter for iterables. you should probably use pprint_thing()
31383138
rather then calling this directly.
@@ -3144,12 +3144,15 @@ def _pprint_seq(seq, _nest_lvl=0, **kwds):
31443144
else:
31453145
fmt = u("[%s]") if hasattr(seq, '__setitem__') else u("(%s)")
31463146

3147-
nitems = get_option("max_seq_items") or len(seq)
3147+
if max_seq_items is False:
3148+
nitems = len(seq)
3149+
else:
3150+
nitems = max_seq_items or get_option("max_seq_items") or len(seq)
31483151

31493152
s = iter(seq)
31503153
r = []
31513154
for i in range(min(nitems, len(seq))): # handle sets, no slicing
3152-
r.append(pprint_thing(next(s), _nest_lvl + 1, **kwds))
3155+
r.append(pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds))
31533156
body = ", ".join(r)
31543157

31553158
if nitems < len(seq):
@@ -3160,7 +3163,7 @@ def _pprint_seq(seq, _nest_lvl=0, **kwds):
31603163
return fmt % body
31613164

31623165

3163-
def _pprint_dict(seq, _nest_lvl=0, **kwds):
3166+
def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds):
31643167
"""
31653168
internal. pprinter for iterables. you should probably use pprint_thing()
31663169
rather then calling this directly.
@@ -3170,11 +3173,14 @@ def _pprint_dict(seq, _nest_lvl=0, **kwds):
31703173

31713174
pfmt = u("%s: %s")
31723175

3173-
nitems = get_option("max_seq_items") or len(seq)
3176+
if max_seq_items is False:
3177+
nitems = len(seq)
3178+
else:
3179+
nitems = max_seq_items or get_option("max_seq_items") or len(seq)
31743180

31753181
for k, v in list(seq.items())[:nitems]:
3176-
pairs.append(pfmt % (pprint_thing(k, _nest_lvl + 1, **kwds),
3177-
pprint_thing(v, _nest_lvl + 1, **kwds)))
3182+
pairs.append(pfmt % (pprint_thing(k, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds),
3183+
pprint_thing(v, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)))
31783184

31793185
if nitems < len(seq):
31803186
return fmt % (", ".join(pairs) + ", ...")
@@ -3183,7 +3189,7 @@ def _pprint_dict(seq, _nest_lvl=0, **kwds):
31833189

31843190

31853191
def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False,
3186-
quote_strings=False):
3192+
quote_strings=False, max_seq_items=None):
31873193
"""
31883194
This function is the sanctioned way of converting objects
31893195
to a unicode representation.
@@ -3202,6 +3208,8 @@ def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False,
32023208
replacements
32033209
default_escapes : bool, default False
32043210
Whether the input escape characters replaces or adds to the defaults
3211+
max_seq_items : False, int, default None
3212+
Pass thru to other pretty printers to limit sequence printing
32053213
32063214
Returns
32073215
-------
@@ -3240,11 +3248,11 @@ def as_escaped_unicode(thing, escape_chars=escape_chars):
32403248
return compat.text_type(thing)
32413249
elif (isinstance(thing, dict) and
32423250
_nest_lvl < get_option("display.pprint_nest_depth")):
3243-
result = _pprint_dict(thing, _nest_lvl, quote_strings=True)
3251+
result = _pprint_dict(thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items)
32443252
elif is_sequence(thing) and _nest_lvl < \
32453253
get_option("display.pprint_nest_depth"):
32463254
result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars,
3247-
quote_strings=quote_strings)
3255+
quote_strings=quote_strings, max_seq_items=max_seq_items)
32483256
elif isinstance(thing, compat.string_types) and quote_strings:
32493257
if compat.PY3:
32503258
fmt = "'%s'"

pandas/core/index.py

+18-26
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,10 @@
2626
from pandas.io.common import PerformanceWarning
2727

2828
# simplify
29-
default_pprint = lambda x: com.pprint_thing(x, escape_chars=('\t', '\r', '\n'),
30-
quote_strings=True)
29+
default_pprint = lambda x, max_seq_items=None: com.pprint_thing(x,
30+
escape_chars=('\t', '\r', '\n'),
31+
quote_strings=True,
32+
max_seq_items=max_seq_items)
3133

3234

3335
__all__ = ['Index']
@@ -2852,32 +2854,22 @@ def equals(self, other):
28522854

28532855
return False
28542856

2855-
def __unicode__(self):
2857+
def _format_attrs(self):
28562858
"""
2857-
Return a string representation for this object.
2858-
2859-
Invoked by unicode(df) in py2 only. Yields a Unicode String in both
2860-
py2/py3.
2859+
Return a list of tuples of the (attr,formatted_value)
28612860
"""
2861+
attrs = [('categories', default_pprint(self.categories, max_seq_items=False)),
2862+
('ordered',self.ordered)]
2863+
if self.name is not None:
2864+
attrs.append(('name',default_pprint(self.name)))
2865+
attrs.append(('dtype',"'%s'" % self.dtype))
2866+
return attrs
28622867

2863-
# currently doesn't use the display.max_categories, or display.max_seq_len
2864-
# for head/tail printing
2865-
values = default_pprint(self.values.get_values())
2866-
cats = default_pprint(self.categories.get_values())
2867-
space = ' ' * (len(self.__class__.__name__) + 1)
2868-
name = self.name
2869-
if name is not None:
2870-
name = default_pprint(name)
2871-
2872-
result = u("{klass}({values},\n{space}categories={categories},\n{space}ordered={ordered},\n{space}name={name})").format(
2873-
klass=self.__class__.__name__,
2874-
values=values,
2875-
categories=cats,
2876-
ordered=self.ordered,
2877-
name=name,
2878-
space=space)
2868+
def _format_space(self):
2869+
return "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
28792870

2880-
return result
2871+
def _format_data(self):
2872+
return default_pprint(self.values.get_values(), max_seq_items=False)
28812873

28822874
@property
28832875
def inferred_type(self):
@@ -3932,8 +3924,8 @@ def _format_attrs(self):
39323924
"""
39333925
Return a list of tuples of the (attr,formatted_value)
39343926
"""
3935-
attrs = [('levels', default_pprint(self.levels)),
3936-
('labels', default_pprint(self.labels))]
3927+
attrs = [('levels', default_pprint(self._levels, max_seq_items=False)),
3928+
('labels', default_pprint(self._labels, max_seq_items=False))]
39373929
if not all(name is None for name in self.names):
39383930
attrs.append(('names', default_pprint(self.names)))
39393931
if self.sortorder is not None:

pandas/tests/test_index.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -1686,7 +1686,7 @@ def test_get_indexer(self):
16861686
self.assertRaises(NotImplementedError, lambda : idx2.get_indexer(idx1, method='backfill'))
16871687
self.assertRaises(NotImplementedError, lambda : idx2.get_indexer(idx1, method='nearest'))
16881688

1689-
def test_repr(self):
1689+
def test_repr_roundtrip(self):
16901690

16911691
ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True)
16921692
str(ci)
@@ -4386,6 +4386,23 @@ def test_repr_with_unicode_data(self):
43864386
index = pd.DataFrame(d).set_index(["a", "b"]).index
43874387
self.assertFalse("\\u" in repr(index)) # we don't want unicode-escaped
43884388

4389+
def test_repr_roundtrip(self):
4390+
4391+
mi = MultiIndex.from_product([list('ab'),range(3)],names=['first','second'])
4392+
str(mi)
4393+
tm.assert_index_equal(eval(repr(mi)),mi,exact=True)
4394+
4395+
# formatting
4396+
if compat.PY3:
4397+
str(mi)
4398+
else:
4399+
compat.text_type(mi)
4400+
4401+
# long format
4402+
mi = MultiIndex.from_product([list('abcdefg'),range(10)],names=['first','second'])
4403+
result = str(mi)
4404+
tm.assert_index_equal(eval(repr(mi)),mi,exact=True)
4405+
43894406
def test_str(self):
43904407
# tested elsewhere
43914408
pass

0 commit comments

Comments
 (0)