diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst index 0e68cddde8bc7..fb9b9db428e34 100644 --- a/doc/source/user_guide/advanced.rst +++ b/doc/source/user_guide/advanced.rst @@ -182,15 +182,15 @@ on a deeper level. Defined Levels ~~~~~~~~~~~~~~ -The repr of a ``MultiIndex`` shows all the defined levels of an index, even +The :class:`MultiIndex` keeps all the defined levels of an index, even if they are not actually used. When slicing an index, you may notice this. For example: .. ipython:: python -   df.columns # original MultiIndex +   df.columns.levels # original MultiIndex - df[['foo','qux']].columns # sliced + df[['foo','qux']].columns.levels # sliced This is done to avoid a recomputation of the levels in order to make slicing highly performant. If you want to see only the used levels, you can use the @@ -210,7 +210,8 @@ To reconstruct the ``MultiIndex`` with only the used levels, the .. ipython:: python - df[['foo', 'qux']].columns.remove_unused_levels() + new_mi = df[['foo', 'qux']].columns.remove_unused_levels() + new_mi.levels Data alignment and using ``reindex`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 7d123697d3d20..3ccffdedcb895 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -74,6 +74,38 @@ a dict to a Series groupby aggregation (:ref:`whatsnew_0200.api_breaking.depreca See :ref:`_groupby.aggregate.named` for more. + +.. _whatsnew_0250.enhancements.multi_index_repr: + +Better repr for MultiIndex +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Printing of :class:`MultiIndex` instances now shows tuples of each row and ensures +that the tuple items are vertically aligned, so it's now easier to understand +the structure of the ``MultiIndex``. (:issue:`13480`): + +The repr now looks like this: + +.. ipython:: python + + pd.MultiIndex.from_product([['a', 'abc'], range(500)]) + +Previously, outputting a :class:`MultiIndex` printed all the ``levels`` and +``codes`` of the ``MultiIndex``, which was visually unappealing and made +the output more difficult to navigate. For example (limiting the range to 5): + +.. code-block:: ipython + + In [1]: pd.MultiIndex.from_product([['a', 'abc'], range(5)]) + Out[1]: MultiIndex(levels=[['a', 'abc'], [0, 1, 2, 3]], + ...: codes=[[0, 0, 0, 0, 1, 1, 1, 1], [0, 1, 2, 3, 0, 1, 2, 3]]) + +In the new repr, all values will be shown, if the number of rows is smaller +than :attr:`options.display.max_seq_items` (default: 100 items). Horizontally, +the output will truncate, if it's wider than :attr:`options.display.width` +(default: 80 characters). + + .. _whatsnew_0250.enhancements.other: Other Enhancements diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 4fb9c4197109f..cd90ab63fb83d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1322,16 +1322,23 @@ def set_names(self, names, level=None, inplace=False): >>> idx = pd.MultiIndex.from_product([['python', 'cobra'], ... [2018, 2019]]) >>> idx - MultiIndex(levels=[['cobra', 'python'], [2018, 2019]], - codes=[[1, 1, 0, 0], [0, 1, 0, 1]]) + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], + ) >>> idx.set_names(['kind', 'year'], inplace=True) >>> idx - MultiIndex(levels=[['cobra', 'python'], [2018, 2019]], - codes=[[1, 1, 0, 0], [0, 1, 0, 1]], + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], names=['kind', 'year']) >>> idx.set_names('species', level=0) - MultiIndex(levels=[['cobra', 'python'], [2018, 2019]], - codes=[[1, 1, 0, 0], [0, 1, 0, 1]], + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], names=['species', 'year']) """ @@ -1393,12 +1400,16 @@ def rename(self, name, inplace=False): ... [2018, 2019]], ... names=['kind', 'year']) >>> idx - MultiIndex(levels=[['cobra', 'python'], [2018, 2019]], - codes=[[1, 1, 0, 0], [0, 1, 0, 1]], + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], names=['kind', 'year']) >>> idx.rename(['species', 'year']) - MultiIndex(levels=[['cobra', 'python'], [2018, 2019]], - codes=[[1, 1, 0, 0], [0, 1, 0, 1]], + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], names=['species', 'year']) >>> idx.rename('species') Traceback (most recent call last): @@ -5420,8 +5431,8 @@ def ensure_index_from_sequences(sequences, names=None): >>> ensure_index_from_sequences([['a', 'a'], ['a', 'b']], names=['L1', 'L2']) - MultiIndex(levels=[['a'], ['a', 'b']], - codes=[[0, 0], [0, 1]], + MultiIndex([('a', 'a'), + ('a', 'b')], names=['L1', 'L2']) See Also @@ -5461,8 +5472,10 @@ def ensure_index(index_like, copy=False): Index([('a', 'a'), ('b', 'c')], dtype='object') >>> ensure_index([['a', 'a'], ['b', 'c']]) - MultiIndex(levels=[['a'], ['b', 'c']], - codes=[[0, 0], [0, 1]]) + MultiIndex([('a', 'b'), + ('a', 'c')], + dtype='object') + ) See Also -------- diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 9217b388ce86b..0f457ba799928 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -29,7 +29,8 @@ from pandas.core.indexes.frozen import FrozenList, _ensure_frozen import pandas.core.missing as missing -from pandas.io.formats.printing import pprint_thing +from pandas.io.formats.printing import ( + format_object_attrs, format_object_summary, pprint_thing) _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update( @@ -193,8 +194,10 @@ class MultiIndex(Index): >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']] >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color')) - MultiIndex(levels=[[1, 2], ['blue', 'red']], - codes=[[0, 0, 1, 1], [1, 0, 1, 0]], + MultiIndex([(1, 'red'), + (1, 'blue'), + (2, 'red'), + (2, 'blue')], names=['number', 'color']) See further examples for how to construct a MultiIndex in the doc strings @@ -359,8 +362,10 @@ def from_arrays(cls, arrays, sortorder=None, names=None): -------- >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']] >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color')) - MultiIndex(levels=[[1, 2], ['blue', 'red']], - codes=[[0, 0, 1, 1], [1, 0, 1, 0]], + MultiIndex([(1, 'red'), + (1, 'blue'), + (2, 'red'), + (2, 'blue')], names=['number', 'color']) """ error_msg = "Input must be a list / sequence of array-likes." @@ -420,8 +425,10 @@ def from_tuples(cls, tuples, sortorder=None, names=None): >>> tuples = [(1, 'red'), (1, 'blue'), ... (2, 'red'), (2, 'blue')] >>> pd.MultiIndex.from_tuples(tuples, names=('number', 'color')) - MultiIndex(levels=[[1, 2], ['blue', 'red']], - codes=[[0, 0, 1, 1], [1, 0, 1, 0]], + MultiIndex([(1, 'red'), + (1, 'blue'), + (2, 'red'), + (2, 'blue')], names=['number', 'color']) """ if not is_list_like(tuples): @@ -477,8 +484,12 @@ def from_product(cls, iterables, sortorder=None, names=None): >>> colors = ['green', 'purple'] >>> pd.MultiIndex.from_product([numbers, colors], ... names=['number', 'color']) - MultiIndex(levels=[[0, 1, 2], ['green', 'purple']], - codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + MultiIndex([(0, 'green'), + (0, 'purple'), + (1, 'green'), + (1, 'purple'), + (2, 'green'), + (2, 'purple')], names=['number', 'color']) """ from pandas.core.arrays.categorical import _factorize_from_iterables @@ -537,15 +548,19 @@ def from_frame(cls, df, sortorder=None, names=None): 3 NJ Precip >>> pd.MultiIndex.from_frame(df) - MultiIndex(levels=[['HI', 'NJ'], ['Precip', 'Temp']], - codes=[[0, 0, 1, 1], [1, 0, 1, 0]], + MultiIndex([('HI', 'Temp'), + ('HI', 'Precip'), + ('NJ', 'Temp'), + ('NJ', 'Precip')], names=['a', 'b']) Using explicit names, instead of the column names >>> pd.MultiIndex.from_frame(df, names=['state', 'observation']) - MultiIndex(levels=[['HI', 'NJ'], ['Precip', 'Temp']], - codes=[[0, 0, 1, 1], [1, 0, 1, 0]], + MultiIndex([('HI', 'Temp'), + ('HI', 'Precip'), + ('NJ', 'Temp'), + ('NJ', 'Precip')], names=['state', 'observation']) """ if not isinstance(df, ABCDataFrame): @@ -663,21 +678,29 @@ def set_levels(self, levels, level=None, inplace=False, >>> idx = pd.MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')], names=['foo', 'bar']) - >>> idx.set_levels([['a','b'], [1,2]]) - MultiIndex(levels=[['a', 'b'], [1, 2]], - codes=[[0, 0, 1, 1], [0, 1, 0, 1]], + >>> idx.set_levels([['a', 'b'], [1, 2]]) + MultiIndex([('a', 1), + ('a', 2), + ('b', 1), + ('b', 2)], names=['foo', 'bar']) - >>> idx.set_levels(['a','b'], level=0) - MultiIndex(levels=[['a', 'b'], ['one', 'two']], - codes=[[0, 0, 1, 1], [0, 1, 0, 1]], + >>> idx.set_levels(['a', 'b'], level=0) + MultiIndex([('a', 'one'), + ('a', 'two'), + ('b', 'one'), + ('b', 'two')], names=['foo', 'bar']) - >>> idx.set_levels(['a','b'], level='bar') - MultiIndex(levels=[[1, 2], ['a', 'b']], - codes=[[0, 0, 1, 1], [0, 1, 0, 1]], + >>> idx.set_levels(['a', 'b'], level='bar') + MultiIndex([(1, 'a'), + (1, 'b'), + (2, 'a'), + (2, 'b')], names=['foo', 'bar']) - >>> idx.set_levels([['a','b'], [1,2]], level=[0,1]) - MultiIndex(levels=[['a', 'b'], [1, 2]], - codes=[[0, 0, 1, 1], [0, 1, 0, 1]], + >>> idx.set_levels([['a', 'b'], [1, 2]], level=[0, 1]) + MultiIndex([('a', 1), + ('a', 2), + ('b', 1), + ('b', 2)], names=['foo', 'bar']) """ if is_list_like(levels) and not isinstance(levels, Index): @@ -779,24 +802,34 @@ def set_codes(self, codes, level=None, inplace=False, Examples -------- - >>> idx = pd.MultiIndex.from_tuples([(1, 'one'), (1, 'two'), - (2, 'one'), (2, 'two')], + >>> idx = pd.MultiIndex.from_tuples([(1, 'one'), + (1, 'two'), + (2, 'one'), + (2, 'two')], names=['foo', 'bar']) - >>> idx.set_codes([[1,0,1,0], [0,0,1,1]]) - MultiIndex(levels=[[1, 2], ['one', 'two']], - codes=[[1, 0, 1, 0], [0, 0, 1, 1]], + >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]]) + MultiIndex([(2, 'one'), + (1, 'one'), + (2, 'two'), + (1, 'two')], names=['foo', 'bar']) - >>> idx.set_codes([1,0,1,0], level=0) - MultiIndex(levels=[[1, 2], ['one', 'two']], - codes=[[1, 0, 1, 0], [0, 1, 0, 1]], + >>> idx.set_codes([1, 0, 1, 0], level=0) + MultiIndex([(2, 'one'), + (1, 'two'), + (2, 'one'), + (1, 'two')], names=['foo', 'bar']) - >>> idx.set_codes([0,0,1,1], level='bar') - MultiIndex(levels=[[1, 2], ['one', 'two']], - codes=[[0, 0, 1, 1], [0, 0, 1, 1]], + >>> idx.set_codes([0, 0, 1, 1], level='bar') + MultiIndex([(1, 'one'), + (1, 'one'), + (2, 'two'), + (2, 'two')], names=['foo', 'bar']) - >>> idx.set_codes([[1,0,1,0], [0,0,1,1]], level=[0,1]) - MultiIndex(levels=[[1, 2], ['one', 'two']], - codes=[[1, 0, 1, 0], [0, 0, 1, 1]], + >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]], level=[0, 1]) + MultiIndex([(2, 'one'), + (1, 'one'), + (2, 'two'), + (1, 'two')], names=['foo', 'bar']) """ if level is not None and not is_list_like(level): @@ -947,28 +980,25 @@ def _nbytes(self, deep=False): # -------------------------------------------------------------------- # Rendering Methods - - def _format_attrs(self): + def _formatter_func(self, tup): """ - Return a list of tuples of the (attr,formatted_value) + Formats each item in tup according to its level's formatter function. """ - attrs = [ - ('levels', ibase.default_pprint(self._levels, - max_seq_items=False)), - ('codes', ibase.default_pprint(self._codes, - max_seq_items=False))] - if com._any_not_none(*self.names): - attrs.append(('names', ibase.default_pprint(self.names))) - if self.sortorder is not None: - attrs.append(('sortorder', ibase.default_pprint(self.sortorder))) - return attrs - - def _format_space(self): - return "\n%s" % (' ' * (len(self.__class__.__name__) + 1)) + formatter_funcs = [level._formatter_func for level in self.levels] + return tuple(func(val) for func, val in zip(formatter_funcs, tup)) def _format_data(self, name=None): - # we are formatting thru the attributes - return None + """ + Return the formatted data as a unicode string + """ + return format_object_summary(self, self._formatter_func, + name=name, line_break_each_value=True) + + def _format_attrs(self): + """ + Return a list of tuples of the (attr,formatted_value). + """ + return format_object_attrs(self, include_dtype=False) def _format_native_types(self, na_rep='nan', **kwargs): new_levels = [] @@ -1555,9 +1585,19 @@ def to_hierarchical(self, n_repeat, n_shuffle=1): >>> idx = pd.MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')]) >>> idx.to_hierarchical(3) - MultiIndex(levels=[[1, 2], ['one', 'two']], - codes=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]]) + MultiIndex([(1, 'one'), + (1, 'one'), + (1, 'one'), + (1, 'two'), + (1, 'two'), + (1, 'two'), + (2, 'one'), + (2, 'one'), + (2, 'one'), + (2, 'two'), + (2, 'two'), + (2, 'two')], + ) """ levels = self.levels codes = [np.repeat(level_codes, n_repeat) for @@ -1648,16 +1688,21 @@ def _sort_levels_monotonic(self): Examples -------- - >>> i = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], - codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) - >>> i - MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], - codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) - - >>> i.sort_monotonic() - MultiIndex(levels=[['a', 'b'], ['aa', 'bb']], - codes=[[0, 0, 1, 1], [1, 0, 1, 0]]) + >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], + ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) + >>> mi + MultiIndex([('a', 'bb'), + ('a', 'aa'), + ('b', 'bb'), + ('b', 'aa')], + ) + >>> mi.sort_values() + MultiIndex([('a', 'aa'), + ('a', 'bb'), + ('b', 'aa'), + ('b', 'bb')], + ) """ if self.is_lexsorted() and self.is_monotonic: @@ -1706,20 +1751,25 @@ def remove_unused_levels(self): Examples -------- - >>> i = pd.MultiIndex.from_product([range(2), list('ab')]) - MultiIndex(levels=[[0, 1], ['a', 'b']], - codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) + >>> mi = pd.MultiIndex.from_product([range(2), list('ab')]) + >>> mi + MultiIndex([(0, 'a'), + (0, 'b'), + (1, 'a'), + (1, 'b')], + ) - >>> i[2:] - MultiIndex(levels=[[0, 1], ['a', 'b']], - codes=[[1, 1], [0, 1]]) + >>> mi[2:] + MultiIndex([(1, 'a'), + (1, 'b')], + ) The 0 from the first level is not represented and can be removed - >>> i[2:].remove_unused_levels() - MultiIndex(levels=[[1], ['a', 'b']], - codes=[[0, 0], [0, 1]]) + >>> mi2 = mi[2:].remove_unused_levels() + >>> mi2.levels + FrozenList([[1], ['a', 'b']]) """ new_levels = [] @@ -2026,11 +2076,17 @@ def swaplevel(self, i=-2, j=-1): >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) >>> mi - MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], - codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) + MultiIndex([('a', 'bb'), + ('a', 'aa'), + ('b', 'bb'), + ('b', 'aa')], + ) >>> mi.swaplevel(0, 1) - MultiIndex(levels=[['bb', 'aa'], ['a', 'b']], - codes=[[0, 1, 0, 1], [0, 0, 1, 1]]) + MultiIndex([('bb', 'a'), + ('aa', 'a'), + ('bb', 'b'), + ('aa', 'b')], + ) """ new_levels = list(self.levels) new_codes = list(self.codes) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index bd756491abd2f..edfd3e7cf2fed 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2548,8 +2548,9 @@ def rsplit(self, pat=None, n=-1, expand=False): Which will create a MultiIndex: >>> idx.str.partition() - MultiIndex(levels=[['X', 'Y'], [' '], ['123', '999']], - codes=[[0, 1], [0, 0], [0, 1]]) + MultiIndex([('X', ' ', '123'), + ('Y', ' ', '999')], + dtype='object') Or an index with tuples with ``expand=False``: diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index bee66fcbfaa82..73d8586a0a8c9 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -265,7 +265,7 @@ class TableSchemaFormatter(BaseFormatter): def format_object_summary(obj, formatter, is_justify=True, name=None, - indent_for_name=True): + indent_for_name=True, line_break_each_value=False): """ Return the formatted obj as a unicode string @@ -282,6 +282,12 @@ def format_object_summary(obj, formatter, is_justify=True, name=None, indent_for_name : bool, default True Whether subsequent lines should be be indented to align with the name. + line_break_each_value : bool, default False + If True, inserts a line break for each value of ``obj``. + If False, only break lines when the a line of values gets wider + than the display width. + + .. versionadded:: 0.25.0 Returns ------- @@ -306,7 +312,12 @@ def format_object_summary(obj, formatter, is_justify=True, name=None, space2 = "\n " # space for the opening '[' n = len(obj) - sep = ',' + if line_break_each_value: + # If we want to vertically align on each value of obj, we need to + # separate values by a line break and indent the values + sep = ',\n ' + ' ' * len(name) + else: + sep = ',' max_seq_items = get_option('display.max_seq_items') or n # are we a truncated display @@ -334,10 +345,10 @@ def best_len(values): if n == 0: summary = '[]{}'.format(close) - elif n == 1: + elif n == 1 and not line_break_each_value: first = formatter(obj[0]) summary = '[{}]{}'.format(first, close) - elif n == 2: + elif n == 2 and not line_break_each_value: first = formatter(obj[0]) last = formatter(obj[-1]) summary = '[{}, {}]{}'.format(first, last, close) @@ -353,21 +364,39 @@ def best_len(values): # adjust all values to max length if needed if is_justify: - - # however, if we are not truncated and we are only a single + if line_break_each_value: + # Justify each string in the values of head and tail, so the + # strings will right align when head and tail are stacked + # vertically. + head, tail = _justify(head, tail) + elif (is_truncated or not (len(', '.join(head)) < display_width and + len(', '.join(tail)) < display_width)): + # Each string in head and tail should align with each other + max_length = max(best_len(head), best_len(tail)) + head = [x.rjust(max_length) for x in head] + tail = [x.rjust(max_length) for x in tail] + # If we are not truncated and we are only a single # line, then don't justify - if (is_truncated or - not (len(', '.join(head)) < display_width and - len(', '.join(tail)) < display_width)): - max_len = max(best_len(head), best_len(tail)) - head = [x.rjust(max_len) for x in head] - tail = [x.rjust(max_len) for x in tail] + + if line_break_each_value: + # Now head and tail are of type List[Tuple[str]]. Below we + # convert them into List[str], so there will be one string per + # value. Also truncate items horizontally if wider than + # max_space + max_space = display_width - len(space2) + value = tail[0] + for max_items in reversed(range(1, len(value) + 1)): + pprinted_seq = _pprint_seq(value, max_seq_items=max_items) + if len(pprinted_seq) < max_space: + break + head = [_pprint_seq(x, max_seq_items=max_items) for x in head] + tail = [_pprint_seq(x, max_seq_items=max_items) for x in tail] summary = "" line = space2 - for i in range(len(head)): - word = head[i] + sep + ' ' + for max_items in range(len(head)): + word = head[max_items] + sep + ' ' summary, line = _extend_line(summary, line, word, display_width, space2) @@ -376,8 +405,8 @@ def best_len(values): summary += line.rstrip() + space2 + '...' line = space2 - for i in range(len(tail) - 1): - word = tail[i] + sep + ' ' + for max_items in range(len(tail) - 1): + word = tail[max_items] + sep + ' ' summary, line = _extend_line(summary, line, word, display_width, space2) @@ -391,7 +420,7 @@ def best_len(values): close = ']' + close.rstrip(' ') summary += close - if len(summary) > (display_width): + if len(summary) > (display_width) or line_break_each_value: summary += space1 else: # one row summary += ' ' @@ -402,7 +431,44 @@ def best_len(values): return summary -def format_object_attrs(obj): +def _justify(head, tail): + """ + Justify items in head and tail, so they are right-aligned when stacked. + + Parameters + ---------- + head : list-like of list-likes of strings + tail : list-like of list-likes of strings + + Returns + ------- + tuple of list of tuples of strings + Same as head and tail, but items are right aligned when stacked + vertically. + + Examples + -------- + >>> _justify([['a', 'b']], [['abc', 'abcd']]) + ([(' a', ' b')], [('abc', 'abcd')]) + """ + combined = head + tail + + # For each position for the sequences in ``combined``, + # find the length of the largest string. + max_length = [0] * len(combined[0]) + for inner_seq in combined: + length = [len(item) for item in inner_seq] + max_length = [max(x, y) for x, y in zip(max_length, length)] + + # justify each item in each list-like in head and tail using max_length + head = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) + for seq in head] + tail = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) + for seq in tail] + return head, tail + + +def format_object_attrs(obj, include_dtype=True): """ Return a list of tuples of the (attr, formatted_value) for common attrs, including dtype, name, length @@ -411,6 +477,8 @@ def format_object_attrs(obj): ---------- obj : object must be iterable + include_dtype : bool + If False, dtype won't be in the returned list Returns ------- @@ -418,10 +486,12 @@ def format_object_attrs(obj): """ attrs = [] - if hasattr(obj, 'dtype'): + if hasattr(obj, 'dtype') and include_dtype: attrs.append(('dtype', "'{}'".format(obj.dtype))) if getattr(obj, 'name', None) is not None: attrs.append(('name', default_pprint(obj.name))) + elif getattr(obj, 'names', None) is not None and any(obj.names): + attrs.append(('names', default_pprint(obj.names))) max_seq_items = get_option('display.max_seq_items') or len(obj) if len(obj) > max_seq_items: attrs.append(('length', len(obj))) diff --git a/pandas/tests/indexes/multi/conftest.py b/pandas/tests/indexes/multi/conftest.py index 956d2e6cc17e3..307772347e8f5 100644 --- a/pandas/tests/indexes/multi/conftest.py +++ b/pandas/tests/indexes/multi/conftest.py @@ -1,6 +1,7 @@ import numpy as np import pytest +import pandas as pd from pandas import Index, MultiIndex @@ -52,3 +53,28 @@ def holder(): def compat_props(): # a MultiIndex must have these properties associated with it return ['shape', 'ndim', 'size'] + + +@pytest.fixture +def narrow_multi_index(): + """ + Return a MultiIndex that is narrower than the display (<80 characters). + """ + n = 1000 + ci = pd.CategoricalIndex(list('a' * n) + (['abc'] * n)) + dti = pd.date_range('2000-01-01', freq='s', periods=n * 2) + return pd.MultiIndex.from_arrays([ci, ci.codes + 9, dti], + names=['a', 'b', 'dti']) + + +@pytest.fixture +def wide_multi_index(): + """ + Return a MultiIndex that is wider than the display (>80 characters). + """ + n = 1000 + ci = pd.CategoricalIndex(list('a' * n) + (['abc'] * n)) + dti = pd.date_range('2000-01-01', freq='s', periods=n * 2) + levels = [ci, ci.codes + 9, dti, dti, dti] + names = ['a', 'b', 'dti_1', 'dti_2', 'dti_3'] + return pd.MultiIndex.from_arrays(levels, names=names) diff --git a/pandas/tests/indexes/multi/test_format.py b/pandas/tests/indexes/multi/test_format.py index c320cb32b856c..8315478d85125 100644 --- a/pandas/tests/indexes/multi/test_format.py +++ b/pandas/tests/indexes/multi/test_format.py @@ -55,31 +55,11 @@ def test_repr_with_unicode_data(): assert "\\" not in repr(index) # we don't want unicode-escaped -@pytest.mark.skip(reason="#22511 will remove this test") -def test_repr_roundtrip(): - +def test_repr_roundtrip_raises(): mi = MultiIndex.from_product([list('ab'), range(3)], names=['first', 'second']) - str(mi) - - tm.assert_index_equal(eval(repr(mi)), mi, exact=True) - - mi_u = MultiIndex.from_product( - [list('ab'), range(3)], names=['first', 'second']) - result = eval(repr(mi_u)) - tm.assert_index_equal(result, mi_u, exact=True) - - # formatting - str(mi) - - # long format - mi = MultiIndex.from_product([list('abcdefg'), range(10)], - names=['first', 'second']) - - tm.assert_index_equal(eval(repr(mi)), mi, exact=True) - - result = eval(repr(mi_u)) - tm.assert_index_equal(result, mi_u, exact=True) + with pytest.raises(TypeError): + eval(repr(mi)) def test_unicode_string_with_unicode(): @@ -94,3 +74,126 @@ def test_repr_max_seq_item_setting(idx): with pd.option_context("display.max_seq_items", None): repr(idx) assert '...' not in str(idx) + + +class TestRepr: + + def test_repr(self, idx): + result = idx[:1].__repr__() + expected = """\ +MultiIndex([('foo', 'one')], + names=['first', 'second'])""" + assert result == expected + + result = idx.__repr__() + expected = """\ +MultiIndex([('foo', 'one'), + ('foo', 'two'), + ('bar', 'one'), + ('baz', 'two'), + ('qux', 'one'), + ('qux', 'two')], + names=['first', 'second'])""" + assert result == expected + + with pd.option_context('display.max_seq_items', 5): + result = idx.__repr__() + expected = """\ +MultiIndex([('foo', 'one'), + ('foo', 'two'), + ... + ('qux', 'one'), + ('qux', 'two')], + names=['first', 'second'], length=6)""" + assert result == expected + + def test_rjust(self, narrow_multi_index): + mi = narrow_multi_index + result = mi[:1].__repr__() + expected = """\ +MultiIndex([('a', 9, '2000-01-01 00:00:00')], + names=['a', 'b', 'dti'])""" + assert result == expected + + result = mi[::500].__repr__() + expected = """\ +MultiIndex([( 'a', 9, '2000-01-01 00:00:00'), + ( 'a', 9, '2000-01-01 00:08:20'), + ('abc', 10, '2000-01-01 00:16:40'), + ('abc', 10, '2000-01-01 00:25:00')], + names=['a', 'b', 'dti'])""" + assert result == expected + + result = mi.__repr__() + expected = """\ +MultiIndex([( 'a', 9, '2000-01-01 00:00:00'), + ( 'a', 9, '2000-01-01 00:00:01'), + ( 'a', 9, '2000-01-01 00:00:02'), + ( 'a', 9, '2000-01-01 00:00:03'), + ( 'a', 9, '2000-01-01 00:00:04'), + ( 'a', 9, '2000-01-01 00:00:05'), + ( 'a', 9, '2000-01-01 00:00:06'), + ( 'a', 9, '2000-01-01 00:00:07'), + ( 'a', 9, '2000-01-01 00:00:08'), + ( 'a', 9, '2000-01-01 00:00:09'), + ... + ('abc', 10, '2000-01-01 00:33:10'), + ('abc', 10, '2000-01-01 00:33:11'), + ('abc', 10, '2000-01-01 00:33:12'), + ('abc', 10, '2000-01-01 00:33:13'), + ('abc', 10, '2000-01-01 00:33:14'), + ('abc', 10, '2000-01-01 00:33:15'), + ('abc', 10, '2000-01-01 00:33:16'), + ('abc', 10, '2000-01-01 00:33:17'), + ('abc', 10, '2000-01-01 00:33:18'), + ('abc', 10, '2000-01-01 00:33:19')], + names=['a', 'b', 'dti'], length=2000)""" + assert result == expected + + def test_tuple_width(self, wide_multi_index): + mi = wide_multi_index + result = mi[:1].__repr__() + expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)], + names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" + assert result == expected + + result = mi[:10].__repr__() + expected = """\ +MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...), + ('a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...), + ('a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...), + ('a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...), + ('a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...), + ('a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...), + ('a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...), + ('a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...), + ('a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...), + ('a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...)], + names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" + assert result == expected + + result = mi.__repr__() + expected = """\ +MultiIndex([( 'a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...), + ( 'a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...), + ( 'a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...), + ( 'a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...), + ( 'a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...), + ( 'a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...), + ( 'a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...), + ( 'a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...), + ( 'a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...), + ( 'a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...), + ... + ('abc', 10, '2000-01-01 00:33:10', '2000-01-01 00:33:10', ...), + ('abc', 10, '2000-01-01 00:33:11', '2000-01-01 00:33:11', ...), + ('abc', 10, '2000-01-01 00:33:12', '2000-01-01 00:33:12', ...), + ('abc', 10, '2000-01-01 00:33:13', '2000-01-01 00:33:13', ...), + ('abc', 10, '2000-01-01 00:33:14', '2000-01-01 00:33:14', ...), + ('abc', 10, '2000-01-01 00:33:15', '2000-01-01 00:33:15', ...), + ('abc', 10, '2000-01-01 00:33:16', '2000-01-01 00:33:16', ...), + ('abc', 10, '2000-01-01 00:33:17', '2000-01-01 00:33:17', ...), + ('abc', 10, '2000-01-01 00:33:18', '2000-01-01 00:33:18', ...), + ('abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)], + names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)""" # noqa + assert result == expected diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index ec9cbd104d751..445d9c4e482b0 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -10,8 +10,11 @@ def test_index_equal_levels_mismatch(): Index levels are different \\[left\\]: 1, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) -\\[right\\]: 2, MultiIndex\\(levels=\\[\\['A', 'B'\\], \\[1, 2, 3, 4\\]\\], - codes=\\[\\[0, 0, 1, 1\\], \\[0, 1, 2, 3\\]\\]\\)""" +\\[right\\]: 2, MultiIndex\\(\\[\\('A', 1\\), + \\('A', 2\\), + \\('B', 3\\), + \\('B', 4\\)\\], + \\)""" idx1 = Index([1, 2, 3]) idx2 = MultiIndex.from_tuples([("A", 1), ("A", 2),