Fix repr of DataFrame with IntervalIndex (pandas-dev#24134)

jorisvandenbossche · Pingviinituutti · commit c13fcd1f19bf · 2019-02-28T10:19:13.000+02:00
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
@@ -1015,10 +1015,11 @@ def _format_with_header(self, header, **kwargs):
 
     def _format_native_types(self, na_rep='', quoting=None, **kwargs):
         """ actually format my specific types """
-        from pandas.io.formats.format import IntervalArrayFormatter
-        return IntervalArrayFormatter(values=self,
-                                      na_rep=na_rep,
-                                      justify='all').get_result()
+        from pandas.io.formats.format import ExtensionArrayFormatter
+        return ExtensionArrayFormatter(values=self,
+                                       na_rep=na_rep,
+                                       justify='all',
+                                       leading_space=False).get_result()
 
     def _format_data(self, name=None):
 
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -840,7 +840,34 @@ def _get_column_name_list(self):
 
 
 def format_array(values, formatter, float_format=None, na_rep='NaN',
-                 digits=None, space=None, justify='right', decimal='.'):
+                 digits=None, space=None, justify='right', decimal='.',
+                 leading_space=None):
+    """
+    Format an array for printing.
+
+    Parameters
+    ----------
+    values
+    formatter
+    float_format
+    na_rep
+    digits
+    space
+    justify
+    decimal
+    leading_space : bool, optional
+        Whether the array should be formatted with a leading space.
+        When an array as a column of a Series or DataFrame, we do want
+        the leading space to pad between columns.
+
+        When formatting an Index subclass
+        (e.g. IntervalIndex._format_native_types), we don't want the
+        leading space since it should be left-aligned.
+
+    Returns
+    -------
+    List[str]
+    """
 
     if is_datetime64_dtype(values.dtype):
         fmt_klass = Datetime64Formatter
@@ -868,7 +895,8 @@ def format_array(values, formatter, float_format=None, na_rep='NaN',
 
     fmt_obj = fmt_klass(values, digits=digits, na_rep=na_rep,
                         float_format=float_format, formatter=formatter,
-                        space=space, justify=justify, decimal=decimal)
+                        space=space, justify=justify, decimal=decimal,
+                        leading_space=leading_space)
 
     return fmt_obj.get_result()
 
@@ -877,7 +905,7 @@ class GenericArrayFormatter(object):
 
     def __init__(self, values, digits=7, formatter=None, na_rep='NaN',
                  space=12, float_format=None, justify='right', decimal='.',
-                 quoting=None, fixed_width=True):
+                 quoting=None, fixed_width=True, leading_space=None):
         self.values = values
         self.digits = digits
         self.na_rep = na_rep
@@ -888,6 +916,7 @@ def __init__(self, values, digits=7, formatter=None, na_rep='NaN',
         self.decimal = decimal
         self.quoting = quoting
         self.fixed_width = fixed_width
+        self.leading_space = leading_space
 
     def get_result(self):
         fmt_values = self._format_strings()
@@ -927,7 +956,9 @@ def _format(x):
             vals = vals.values
 
         is_float_type = lib.map_infer(vals, is_float) & notna(vals)
-        leading_space = is_float_type.any()
+        leading_space = self.leading_space
+        if leading_space is None:
+            leading_space = is_float_type.any()
 
         fmt_values = []
         for i, v in enumerate(vals):
@@ -936,7 +967,13 @@ def _format(x):
             elif is_float_type[i]:
                 fmt_values.append(float_format(v))
             else:
-                fmt_values.append(u' {v}'.format(v=_format(v)))
+                if leading_space is False:
+                    # False specifically, so that the default is
+                    # to include a space if we get here.
+                    tpl = u'{v}'
+                else:
+                    tpl = u' {v}'
+                fmt_values.append(tpl.format(v=_format(v)))
 
         return fmt_values
 
@@ -1135,7 +1172,8 @@ def _format_strings(self):
                                   formatter,
                                   float_format=self.float_format,
                                   na_rep=self.na_rep, digits=self.digits,
-                                  space=self.space, justify=self.justify)
+                                  space=self.space, justify=self.justify,
+                                  leading_space=self.leading_space)
         return fmt_values
 
 
diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py
@@ -377,7 +377,21 @@ def test_repr_max_seq_item_setting(self):
     def test_repr_roundtrip(self):
         super(TestIntervalIndex, self).test_repr_roundtrip()
 
-    # TODO: check this behavior is consistent with test_interval_new.py
+    def test_frame_repr(self):
+        # https://github.com/pandas-dev/pandas/pull/24134/files
+        df = pd.DataFrame({'A': [1, 2, 3, 4]},
+                          index=pd.IntervalIndex.from_breaks([0, 1, 2, 3, 4]))
+        result = repr(df)
+        expected = (
+            '        A\n'
+            '(0, 1]  1\n'
+            '(1, 2]  2\n'
+            '(2, 3]  3\n'
+            '(3, 4]  4'
+        )
+        assert result == expected
+
+        # TODO: check this behavior is consistent with test_interval_new.py
     def test_get_item(self, closed):
         i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan),
                                       closed=closed)
diff --git a/pandas/tests/indexes/period/test_formats.py b/pandas/tests/indexes/period/test_formats.py
@@ -49,6 +49,18 @@ def test_to_native_types():
 
 
 class TestPeriodIndexRendering(object):
+
+    def test_frame_repr(self):
+        df = pd.DataFrame({"A": [1, 2, 3]},
+                          index=pd.date_range('2000', periods=3))
+        result = repr(df)
+        expected = (
+            '            A\n'
+            '2000-01-01  1\n'
+            '2000-01-02  2\n'
+            '2000-01-03  3')
+        assert result == expected
+
     @pytest.mark.parametrize('method', ['__repr__', '__unicode__', '__str__'])
     def test_representation(self, method):
         # GH#7601
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
@@ -812,6 +812,13 @@ def test_equals_categoridcal_unordered(self):
         assert not a.equals(c)
         assert not b.equals(c)
 
+    def test_frame_repr(self):
+        df = pd.DataFrame({"A": [1, 2, 3]},
+                          index=pd.CategoricalIndex(['a', 'b', 'c']))
+        result = repr(df)
+        expected = '   A\na  1\nb  2\nc  3'
+        assert result == expected
+
     def test_string_categorical_index_repr(self):
         # short
         idx = pd.CategoricalIndex(['a', 'bb', 'ccc'])