changed according to comments

topper-123 · topper-123 · commit 025724eafca5 · 2018-08-31T15:15:38.000+01:00
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -176,14 +176,11 @@ understand the structure of the ``MultiIndex``. (:issue:`13480`):
 
 .. ipython:: python
 
-   index1=range(1000)
-   index2 = pd.Index(['a'] * 500 + ['abc'] * 500)
-   pd.MultiIndex.from_arrays([index1, index2])
+   pd.MultiIndex.from_product([['a', 'abc'], range(500)])
 
-For number of rows smaller than :attr:`options.display.max_seq_items`, all
-values will be shown (default: 100 items). Horizontally, the output will
+If the number of rows is smaller than :attr:`options.display.max_seq_items`,
+all values will be shown (default: 100 items). Horizontally, the output will
 truncate, if it's longer than :attr:`options.display.width` (default: 80 characters).
-This solves the problem with outputting large MultiIndex instances to the console.
 
 
 .. _whatsnew_0240.enhancements.other:
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -636,30 +636,7 @@ def _format_data(self, name=None):
         Return the formatted data as a unicode string
         """
         return format_object_summary(self, self._formatter_func,
-                                     name=name, is_multi=True)
-
-    def __unicode__(self):
-        """
-        Return a string representation for this MultiIndex.
-
-        Invoked by unicode(df) in py2 only. Yields a Unicode String in both
-        py2/py3.
-        """
-        klass = self.__class__.__name__
-        data = self._format_data()
-        attrs = self._format_attrs()
-        space = self._format_space()
-
-        prepr = (u(",%s") %
-                 space).join(u("%s=%s") % (k, v) for k, v in attrs)
-
-        # no data provided, just attributes
-        if data is None:
-            data = ''
-
-        res = u("%s(%s%s)") % (klass, data, prepr)
-
-        return res
+                                     name=name, line_break_each_value=True)
 
     def __len__(self):
         return len(self.labels[0])
diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py
@@ -269,7 +269,7 @@ class TableSchemaFormatter(BaseFormatter):
 
 
 def format_object_summary(obj, formatter, is_justify=True,
-                          name=None, is_multi=False):
+                          name=None, line_break_each_value=False):
     """
     Return the formatted obj as a unicode string
 
@@ -283,8 +283,10 @@ def format_object_summary(obj, formatter, is_justify=True,
         should justify the display
     name : name, optional
         defaults to the class name of the obj
-    is_multi : bool, default False
-        Is ``obj`` a :class:`MultiIndex` or not
+    line_break_each_value : bool, default False
+        If True, inserts a line break for each value of ``obj``.
+        If False, only break lines when the a line of values gets wider
+        than the display width
 
     Returns
     -------
@@ -304,7 +306,7 @@ def format_object_summary(obj, formatter, is_justify=True,
     space2 = "\n%s" % (' ' * (len(name) + 2))
 
     n = len(obj)
-    sep = ',' if not is_multi else (',\n ' + ' ' * len(name))
+    sep = ',' if not line_break_each_value else (',\n ' + ' ' * len(name))
     max_seq_items = get_option('display.max_seq_items') or n
 
     # are we a truncated display
@@ -330,10 +332,10 @@ def best_len(values):
 
     if n == 0:
         summary = '[], '
-    elif n == 1 and not is_multi:
+    elif n == 1 and not line_break_each_value:
         first = formatter(obj[0])
         summary = '[%s], ' % first
-    elif n == 2 and not is_multi:
+    elif n == 2 and not line_break_each_value:
         first = formatter(obj[0])
         last = formatter(obj[-1])
         summary = '[%s, %s], ' % (first, last)
@@ -349,9 +351,15 @@ def best_len(values):
 
         # adjust all values to max length if needed
         if is_justify:
-            head, tail = _justify(head, tail, display_width, best_len,
-                                  is_truncated, is_multi)
-        if is_multi:
+            if line_break_each_value:
+                head, tail = _justify(head, tail)
+            elif (is_truncated or not (len(', '.join(head)) < display_width and
+                                       len(', '.join(tail)) < display_width)):
+                max_length = max(best_len(head), best_len(tail))
+                head = [x.rjust(max_length) for x in head]
+                tail = [x.rjust(max_length) for x in tail]
+
+        if line_break_each_value:
             max_space = display_width - len(space2)
             item = tail[0]
             for i in reversed(range(1, len(item) + 1)):
@@ -384,7 +392,7 @@ def best_len(values):
         summary += line
         summary += '],'
 
-        if len(summary) > (display_width) or is_multi:
+        if len(summary) > (display_width) or line_break_each_value:
             summary += space1
         else:  # one row
             summary += ' '
@@ -395,23 +403,38 @@ def best_len(values):
     return summary
 
 
-def _justify(head, tail, display_width, best_len,
-             is_truncated=False, is_multi=False):
+def _justify(head, tail):
     """
-    Justify each item in head and tail, so they align properly.
+    Justify each item in each list-like in head and tail, so each item
+    right-aligns when the two list-likes are stacked vertically.
+
+    Parameters
+    ----------
+    head : list-like of list-likes of strings
+    tail : list-like of list-likes of strings
+
+    Returns
+    -------
+    head : list of tuples of strings
+    tail : list of tuples of strings
+
+    Examples
+    --------
+    >>> _justify([['a', 'b']], [['abc', 'abcd']])
+    ([('  a', '   b')], [('abc', 'abcd')])
     """
-    if is_multi:
-        max_length = _max_level_item_length(head + tail)
-        head = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
-                for seq in head]
-        tail = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
-                for seq in tail]
-    elif (is_truncated or not (len(', '.join(head)) < display_width and
-                               len(', '.join(tail)) < display_width)):
-        max_length = max(best_len(head), best_len(tail))
-        head = [x.rjust(max_length) for x in head]
-        tail = [x.rjust(max_length) for x in tail]
+    seq = head + tail  # type: List[str]
+    # For each position for the sequences in ``seq``, find the largest length.
+    max_length = [0] * len(seq[0])  # type: List[int]
+    for inner_seq in seq:
+        length = [len(item) for item in inner_seq]
+        max_length = [max(x, y) for x, y in zip(max_length, length)]
 
+    # justify each item in each list-like in head and tail using max_length
+    head = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
+            for seq in head]
+    tail = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
+            for seq in tail]
     return head, tail
 
 
diff --git a/pandas/tests/indexes/multi/test_format.py b/pandas/tests/indexes/multi/test_format.py
@@ -59,6 +59,13 @@ def test_repr_with_unicode_data():
         assert "\\u" not in repr(index)  # we don't want unicode-escaped
 
 
+def test_repr_roundtrip_raises():
+    mi = MultiIndex.from_product([list('ab'), range(3)],
+                                 names=['first', 'second'])
+    with pytest.raises(TypeError):
+        eval(repr(mi))
+
+
 def test_unicode_string_with_unicode():
     d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
     idx = pd.DataFrame(d).set_index(["a", "b"]).index