pandas-dev · jreback · Dec 4, 2018 · Nov 9, 2018 · Nov 9, 2018 · Nov 9, 2018
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -857,6 +857,7 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your
 - :meth:`DataFrame.stack` no longer converts to object dtype for DataFrames where each column has the same extension dtype. The output Series will have the same dtype as the columns (:issue:`23077`).
 - :meth:`Series.unstack` and :meth:`DataFrame.unstack` no longer convert extension arrays to object-dtype ndarrays. Each column in the output ``DataFrame`` will now have the same dtype as the input (:issue:`23077`).
 - Bug when grouping :meth:`Dataframe.groupby()` and aggregating on ``ExtensionArray`` it was not returning the actual ``ExtensionArray`` dtype (:issue:`23227`).
+- A default repr is now provided.
 
 .. _whatsnew_0240.api.incompatibilities:
 

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -49,6 +49,13 @@ class ExtensionArray(object):
 
     * _formatting_values
 
+    A default repr displaying the type, (truncated) data, length,
+    and dtype is provided. It can be customized or replaced by
+    by overriding:
+
+    * _formatter
+    * __repr__
+
     Some methods require casting the ExtensionArray to an ndarray of Python
     objects with ``self.astype(object)``, which may be expensive. When
     performance is a concern, we highly recommend overriding the following
@@ -653,15 +660,46 @@ def copy(self, deep=False):
         raise AbstractMethodError(self)
 
     # ------------------------------------------------------------------------
-    # Block-related methods
+    # Printing
     # ------------------------------------------------------------------------
+    def __repr__(self):
+        from pandas.io.formats.printing import format_object_summary
+
+        template = (
+            '<{class_name}>\n'
+            '{data}\n'
+            'Length: {length}, dtype: {dtype}'
+        )
+        # the short repr has no trailing newline, while the truncated
+        # repr does. So we include a newline in our template, and strip
+        # any trailing newlines from format_object_summary
+        data = format_object_summary(self, self._formatter, name=False,
+                                     trailing_comma=False).rstrip('\n')
+        name = self.__class__.__name__
+        return template.format(class_name=name, data=data,
+                               length=len(self),
+                               dtype=self.dtype)
+
+    @property
+    def _formatter(self):
+        # type: () -> Callable[Any]
+        """Formatting function for scalar values.
+
+        This is used in the default '__repr__'. The formatting function
+        receives instances of your scalar type.
+        """
+        return str
 
     def _formatting_values(self):
         # type: () -> np.ndarray
         # At the moment, this has to be an array since we use result.dtype
         """An array of values to be printed in, e.g. the Series repr"""
         return np.array(self)
 
+    # ------------------------------------------------------------------------
+    # Reshaping
+    # ------------------------------------------------------------------------
+
     @classmethod
     def _concat_same_type(cls, to_concat):
         # type: (Sequence[ExtensionArray]) -> ExtensionArray

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -1986,6 +1986,8 @@ def __unicode__(self):
 
         return result
 
+    __repr__ = __unicode__
+
     def _maybe_coerce_indexer(self, indexer):
         """ return an indexer coerced to the codes dtype """
         if isinstance(indexer, np.ndarray) and indexer.dtype.kind == 'i':

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -6,7 +6,7 @@
 
 from pandas._libs import lib
 from pandas.util._decorators import cache_readonly
-from pandas.compat import u, range, string_types
+from pandas.compat import range, string_types
 from pandas.compat import set_function_name
 
 from pandas.core import nanops
@@ -24,9 +24,6 @@
 from pandas.core.dtypes.dtypes import register_extension_dtype
 from pandas.core.dtypes.missing import isna, notna
 
-from pandas.io.formats.printing import (
-    format_object_summary, format_object_attrs, default_pprint)
-
 
 class _IntegerDtype(ExtensionDtype):
     """
@@ -353,25 +350,6 @@ def __setitem__(self, key, value):
     def __len__(self):
         return len(self._data)
 
-    def __repr__(self):
-        """
-        Return a string representation for this object.
-
-        Invoked by unicode(df) in py2 only. Yields a Unicode String in both
-        py2/py3.
-        """
-        klass = self.__class__.__name__
-        data = format_object_summary(self, default_pprint, False)
-        attrs = format_object_attrs(self)
-        space = " "
-
-        prepr = (u(",%s") %
-                 space).join(u("%s=%s") % (k, v) for k, v in attrs)
-
-        res = u("%s(%s%s)") % (klass, data, prepr)
-
-        return res
-
     @property
     def nbytes(self):
         return self._data.nbytes + self._mask.nbytes

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -835,16 +835,6 @@ def _format_data(self):
 
         return summary
 
-    def __repr__(self):
-        tpl = textwrap.dedent("""\
-        {cls}({data},
-        {lead}closed='{closed}',
-        {lead}dtype='{dtype}')""")
-        return tpl.format(cls=self.__class__.__name__,
-                          data=self._format_data(),
-                          lead=' ' * len(self.__class__.__name__) + ' ',
-                          closed=self.closed, dtype=self.dtype)
-
     def _format_space(self):
         space = ' ' * (len(self.__class__.__name__) + 1)
         return "\n{space}".format(space=space)

diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
@@ -330,14 +330,6 @@ def start_time(self):
     def end_time(self):
         return self.to_timestamp(how='end')
 
-    def __repr__(self):
-        return '<{}>\n{}\nLength: {}, dtype: {}'.format(
-            self.__class__.__name__,
-            [str(s) for s in self],
-            len(self),
-            self.dtype
-        )
-
     def __setitem__(
             self,
             key,   # type: Union[int, Sequence[int], Sequence[bool]]

diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py
@@ -271,7 +271,9 @@ class TableSchemaFormatter(BaseFormatter):
                  max_seq_items=max_seq_items)
 
 
-def format_object_summary(obj, formatter, is_justify=True, name=None):
+def format_object_summary(obj, formatter, is_justify=True, name=None,
+                          trailing_comma=True,
+                          truncated_trailing_newline=True):
     """
     Return the formatted obj as a unicode string
 
@@ -283,9 +285,14 @@ def format_object_summary(obj, formatter, is_justify=True, name=None):
         string formatter for an element
     is_justify : boolean
         should justify the display
-    name : name, optiona
+    name : name, optional
         defaults to the class name of the obj
 
+        Pass ``False`` to indicate that subsequent lines should
+        not be indented to align with the name.
+    trailing_comma : bool, default True
+        Whether to include a comma after the closing ']'
+
     Returns
     -------
     summary string
@@ -300,8 +307,13 @@ def format_object_summary(obj, formatter, is_justify=True, name=None):
     if name is None:
         name = obj.__class__.__name__
 
-    space1 = "\n%s" % (' ' * (len(name) + 1))
-    space2 = "\n%s" % (' ' * (len(name) + 2))
+    if name is False:
+        space1 = "\n"
+        space2 = "\n "  # space for the opening '['
+    else:
+        name_len = len(name)
+        space1 = "\n%s" % (' ' * (name_len + 1))
+        space2 = "\n%s" % (' ' * (name_len + 2))
 
     n = len(obj)
     sep = ','
@@ -328,15 +340,20 @@ def best_len(values):
         else:
             return 0
 
+    if trailing_comma:
+        close = ', '
+    else:
+        close = ''
+
     if n == 0:
-        summary = '[], '
+        summary = '[]{}'.format(close)
     elif n == 1:
         first = formatter(obj[0])
-        summary = '[%s], ' % first
+        summary = '[{}]{}'.format(first, close)
     elif n == 2:
         first = formatter(obj[0])
         last = formatter(obj[-1])
-        summary = '[%s, %s], ' % (first, last)
+        summary = '[{}, {}]{}'.format(first, last, close)
     else:
 
         if n > max_seq_items:
@@ -381,7 +398,11 @@ def best_len(values):
         summary, line = _extend_line(summary, line, tail[-1],
                                      display_width - 2, space2)
         summary += line
-        summary += '],'
+
+        # right now close is either '' or ', '
+        # Now we want to include the ']', but not the maybe space.
+        close = ']' + close.rstrip(' ')
+        summary += close
 
         if len(summary) > (display_width):
             summary += space1

diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py
@@ -1,10 +1,10 @@
 # -*- coding: utf-8 -*-
 import numpy as np
-import pytest
 
-from pandas import Index, IntervalIndex, date_range, timedelta_range
+from pandas import Index, date_range, option_context, timedelta_range
 from pandas.core.arrays import IntervalArray
 import pandas.util.testing as tm
+import pytest
 
 
 @pytest.fixture(params=[
@@ -65,8 +65,26 @@ def test_set_na(self, left_right_dtypes):
         tm.assert_extension_array_equal(result, expected)
 
 
-def test_repr_matches():
-    idx = IntervalIndex.from_breaks([1, 2, 3])
-    a = repr(idx)
-    b = repr(idx.values)
-    assert a.replace("Index", "Array") == b
+def test_repr_small():
+    arr = IntervalArray.from_breaks([1, 2, 3])
+    result = repr(arr)
+    expected = (
+        '<IntervalArray>\n'
+        '[(1, 2], (2, 3]]\n'
+        'Length: 2, dtype: interval[int64]'
+    )
+    assert result == expected
+
+
+def test_repr_large():
+    arr = IntervalArray.from_breaks([1, 2, 3, 4, 5, 6])
+    with option_context('display.max_seq_items', 2):
+        result = repr(arr)
+    expected = (
+        '<IntervalArray>\n'
+        '[(1, 2],\n'
+        ' ...\n'
+        ' (5, 6]] \n'
+        'Length: 5, dtype: interval[int64]'
+    )
+    assert result == expected
diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py
@@ -1,6 +1,5 @@
 # -*- coding: utf-8 -*-
 import numpy as np
-import pytest
 
 from pandas.core.dtypes.generic import ABCIndexClass
 
@@ -12,6 +11,7 @@
     UInt32Dtype, UInt64Dtype)
 from pandas.tests.extension.base import BaseOpsUtil
 import pandas.util.testing as tm
+import pytest
 
 
 def make_data():
@@ -57,24 +57,23 @@ def test_dtypes(dtype):
     assert dtype.name is not None
 
 
-class TestInterface(object):
+def test_repr_array(data):
+    result = repr(data)
+    assert '<IntegerArray>' in result
 
-    def test_repr_array(self, data):
-        result = repr(data)
+    # not long
+    assert '...' not in result
+    assert 'Length: ' in result
+    assert 'dtype: ' in result
 
-        # not long
-        assert '...' not in result
 
-        assert 'dtype=' in result
-        assert 'IntegerArray' in result
-
-    def test_repr_array_long(self, data):
-        # some arrays may be able to assert a ... in the repr
-        with pd.option_context('display.max_seq_items', 1):
-            result = repr(data)
+def test_repr_array_long(data):
+    # some arrays may be able to assert a ... in the repr
+    with pd.option_context('display.max_seq_items', 1):
+        result = repr(data)
 
-            assert '...' in result
-            assert 'length' in result
+        assert '...' in result
+        assert 'Length' in result
 
 
 class TestConstructors(object):

diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py
@@ -1,5 +1,4 @@
 import numpy as np
-import pytest
 
 from pandas._libs.tslibs import iNaT
 from pandas._libs.tslibs.period import IncompatibleFrequency
@@ -10,6 +9,7 @@
 import pandas as pd
 from pandas.core.arrays import PeriodArray, period_array
 import pandas.util.testing as tm
+import pytest
 
 # ----------------------------------------------------------------------------
 # Constructors
@@ -195,3 +195,34 @@ def tet_sub_period():
     other = pd.Period("2000", freq="M")
     with tm.assert_raises_regex(IncompatibleFrequency, "freq"):
         arr - other
+
+
+# ----------------------------------------------------------------------------
+# Printing
+
+def test_repr_small():
+    arr = period_array(['2000', '2001'], freq='D')
+    result = str(arr)
+    expected = (
+        '<PeriodArray>\n'
+        '[2000-01-01, 2001-01-01]\n'
+        'Length: 2, dtype: period[D]'
+    )
+    assert result == expected
+
+
+def test_repr_large():
+    arr = period_array(['2000', '2001'] * 500, freq='D')
+    result = str(arr)
+    expected = (
+        '<PeriodArray>\n'
+        '[2000-01-01, 2001-01-01, 2000-01-01, 2001-01-01, 2000-01-01, '
+        '2001-01-01,\n'  # continuation
+        ' 2000-01-01, 2001-01-01, 2000-01-01, 2001-01-01,\n'
+        ' ...\n'
+        ' 2000-01-01, 2001-01-01, 2000-01-01, 2001-01-01, 2000-01-01, '
+        '2001-01-01,\n'  # continuation
+        ' 2000-01-01, 2001-01-01, 2000-01-01, 2001-01-01]\n'
+        'Length: 1000, dtype: period[D]'
+    )
+    assert result == expected
diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py
@@ -48,6 +48,7 @@ class TestMyDtype(BaseDtypeTests):
 from .interface import BaseInterfaceTests  # noqa
 from .methods import BaseMethodsTests  # noqa
 from .ops import BaseArithmeticOpsTests, BaseComparisonOpsTests, BaseOpsUtil  # noqa
+from .printing import BasePrintingTests  # noqa
 from .reduce import BaseNoReduceTests, BaseNumericReduceTests, BaseBooleanReduceTests  # noqa
 from .missing import BaseMissingTests  # noqa
 from .reshaping import BaseReshapingTests  # noqa