Skip to content

Commit 0fdbfd3

Browse files
committed
wip
1 parent ce62a5c commit 0fdbfd3

21 files changed

+213
-110
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -857,6 +857,7 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your
857857
- :meth:`DataFrame.stack` no longer converts to object dtype for DataFrames where each column has the same extension dtype. The output Series will have the same dtype as the columns (:issue:`23077`).
858858
- :meth:`Series.unstack` and :meth:`DataFrame.unstack` no longer convert extension arrays to object-dtype ndarrays. Each column in the output ``DataFrame`` will now have the same dtype as the input (:issue:`23077`).
859859
- Bug when grouping :meth:`Dataframe.groupby()` and aggregating on ``ExtensionArray`` it was not returning the actual ``ExtensionArray`` dtype (:issue:`23227`).
860+
- A default repr is now provided.
860861

861862
.. _whatsnew_0240.api.incompatibilities:
862863

pandas/core/arrays/base.py

+39-1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,13 @@ class ExtensionArray(object):
4949
5050
* _formatting_values
5151
52+
A default repr displaying the type, (truncated) data, length,
53+
and dtype is provided. It can be customized or replaced by
54+
by overriding:
55+
56+
* _formatter
57+
* __repr__
58+
5259
Some methods require casting the ExtensionArray to an ndarray of Python
5360
objects with ``self.astype(object)``, which may be expensive. When
5461
performance is a concern, we highly recommend overriding the following
@@ -653,15 +660,46 @@ def copy(self, deep=False):
653660
raise AbstractMethodError(self)
654661

655662
# ------------------------------------------------------------------------
656-
# Block-related methods
663+
# Printing
657664
# ------------------------------------------------------------------------
665+
def __repr__(self):
666+
from pandas.io.formats.printing import format_object_summary
667+
668+
template = (
669+
'<{class_name}>\n'
670+
'{data}\n'
671+
'Length: {length}, dtype: {dtype}'
672+
)
673+
# the short repr has no trailing newline, while the truncated
674+
# repr does. So we include a newline in our template, and strip
675+
# any trailing newlines from format_object_summary
676+
data = format_object_summary(self, self._formatter, name=False,
677+
trailing_comma=False).rstrip('\n')
678+
name = self.__class__.__name__
679+
return template.format(class_name=name, data=data,
680+
length=len(self),
681+
dtype=self.dtype)
682+
683+
@property
684+
def _formatter(self):
685+
# type: () -> Callable[Any]
686+
"""Formatting function for scalar values.
687+
688+
This is used in the default '__repr__'. The formatting function
689+
receives instances of your scalar type.
690+
"""
691+
return str
658692

659693
def _formatting_values(self):
660694
# type: () -> np.ndarray
661695
# At the moment, this has to be an array since we use result.dtype
662696
"""An array of values to be printed in, e.g. the Series repr"""
663697
return np.array(self)
664698

699+
# ------------------------------------------------------------------------
700+
# Reshaping
701+
# ------------------------------------------------------------------------
702+
665703
@classmethod
666704
def _concat_same_type(cls, to_concat):
667705
# type: (Sequence[ExtensionArray]) -> ExtensionArray

pandas/core/arrays/categorical.py

+2
Original file line numberDiff line numberDiff line change
@@ -1986,6 +1986,8 @@ def __unicode__(self):
19861986

19871987
return result
19881988

1989+
__repr__ = __unicode__
1990+
19891991
def _maybe_coerce_indexer(self, indexer):
19901992
""" return an indexer coerced to the codes dtype """
19911993
if isinstance(indexer, np.ndarray) and indexer.dtype.kind == 'i':

pandas/core/arrays/integer.py

+1-23
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from pandas._libs import lib
88
from pandas.util._decorators import cache_readonly
9-
from pandas.compat import u, range, string_types
9+
from pandas.compat import range, string_types
1010
from pandas.compat import set_function_name
1111

1212
from pandas.core import nanops
@@ -24,9 +24,6 @@
2424
from pandas.core.dtypes.dtypes import register_extension_dtype
2525
from pandas.core.dtypes.missing import isna, notna
2626

27-
from pandas.io.formats.printing import (
28-
format_object_summary, format_object_attrs, default_pprint)
29-
3027

3128
class _IntegerDtype(ExtensionDtype):
3229
"""
@@ -353,25 +350,6 @@ def __setitem__(self, key, value):
353350
def __len__(self):
354351
return len(self._data)
355352

356-
def __repr__(self):
357-
"""
358-
Return a string representation for this object.
359-
360-
Invoked by unicode(df) in py2 only. Yields a Unicode String in both
361-
py2/py3.
362-
"""
363-
klass = self.__class__.__name__
364-
data = format_object_summary(self, default_pprint, False)
365-
attrs = format_object_attrs(self)
366-
space = " "
367-
368-
prepr = (u(",%s") %
369-
space).join(u("%s=%s") % (k, v) for k, v in attrs)
370-
371-
res = u("%s(%s%s)") % (klass, data, prepr)
372-
373-
return res
374-
375353
@property
376354
def nbytes(self):
377355
return self._data.nbytes + self._mask.nbytes

pandas/core/arrays/interval.py

-10
Original file line numberDiff line numberDiff line change
@@ -835,16 +835,6 @@ def _format_data(self):
835835

836836
return summary
837837

838-
def __repr__(self):
839-
tpl = textwrap.dedent("""\
840-
{cls}({data},
841-
{lead}closed='{closed}',
842-
{lead}dtype='{dtype}')""")
843-
return tpl.format(cls=self.__class__.__name__,
844-
data=self._format_data(),
845-
lead=' ' * len(self.__class__.__name__) + ' ',
846-
closed=self.closed, dtype=self.dtype)
847-
848838
def _format_space(self):
849839
space = ' ' * (len(self.__class__.__name__) + 1)
850840
return "\n{space}".format(space=space)

pandas/core/arrays/period.py

-8
Original file line numberDiff line numberDiff line change
@@ -330,14 +330,6 @@ def start_time(self):
330330
def end_time(self):
331331
return self.to_timestamp(how='end')
332332

333-
def __repr__(self):
334-
return '<{}>\n{}\nLength: {}, dtype: {}'.format(
335-
self.__class__.__name__,
336-
[str(s) for s in self],
337-
len(self),
338-
self.dtype
339-
)
340-
341333
def __setitem__(
342334
self,
343335
key, # type: Union[int, Sequence[int], Sequence[bool]]

pandas/io/formats/printing.py

+29-8
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,9 @@ class TableSchemaFormatter(BaseFormatter):
271271
max_seq_items=max_seq_items)
272272

273273

274-
def format_object_summary(obj, formatter, is_justify=True, name=None):
274+
def format_object_summary(obj, formatter, is_justify=True, name=None,
275+
trailing_comma=True,
276+
truncated_trailing_newline=True):
275277
"""
276278
Return the formatted obj as a unicode string
277279
@@ -283,9 +285,14 @@ def format_object_summary(obj, formatter, is_justify=True, name=None):
283285
string formatter for an element
284286
is_justify : boolean
285287
should justify the display
286-
name : name, optiona
288+
name : name, optional
287289
defaults to the class name of the obj
288290
291+
Pass ``False`` to indicate that subsequent lines should
292+
not be indented to align with the name.
293+
trailing_comma : bool, default True
294+
Whether to include a comma after the closing ']'
295+
289296
Returns
290297
-------
291298
summary string
@@ -300,8 +307,13 @@ def format_object_summary(obj, formatter, is_justify=True, name=None):
300307
if name is None:
301308
name = obj.__class__.__name__
302309

303-
space1 = "\n%s" % (' ' * (len(name) + 1))
304-
space2 = "\n%s" % (' ' * (len(name) + 2))
310+
if name is False:
311+
space1 = "\n"
312+
space2 = "\n " # space for the opening '['
313+
else:
314+
name_len = len(name)
315+
space1 = "\n%s" % (' ' * (name_len + 1))
316+
space2 = "\n%s" % (' ' * (name_len + 2))
305317

306318
n = len(obj)
307319
sep = ','
@@ -328,15 +340,20 @@ def best_len(values):
328340
else:
329341
return 0
330342

343+
if trailing_comma:
344+
close = ', '
345+
else:
346+
close = ''
347+
331348
if n == 0:
332-
summary = '[], '
349+
summary = '[]{}'.format(close)
333350
elif n == 1:
334351
first = formatter(obj[0])
335-
summary = '[%s], ' % first
352+
summary = '[{}]{}'.format(first, close)
336353
elif n == 2:
337354
first = formatter(obj[0])
338355
last = formatter(obj[-1])
339-
summary = '[%s, %s], ' % (first, last)
356+
summary = '[{}, {}]{}'.format(first, last, close)
340357
else:
341358

342359
if n > max_seq_items:
@@ -381,7 +398,11 @@ def best_len(values):
381398
summary, line = _extend_line(summary, line, tail[-1],
382399
display_width - 2, space2)
383400
summary += line
384-
summary += '],'
401+
402+
# right now close is either '' or ', '
403+
# Now we want to include the ']', but not the maybe space.
404+
close = ']' + close.rstrip(' ')
405+
summary += close
385406

386407
if len(summary) > (display_width):
387408
summary += space1

pandas/tests/arrays/interval/test_interval.py

+25-7
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
# -*- coding: utf-8 -*-
22
import numpy as np
3-
import pytest
43

5-
from pandas import Index, IntervalIndex, date_range, timedelta_range
4+
from pandas import Index, date_range, option_context, timedelta_range
65
from pandas.core.arrays import IntervalArray
76
import pandas.util.testing as tm
7+
import pytest
88

99

1010
@pytest.fixture(params=[
@@ -65,8 +65,26 @@ def test_set_na(self, left_right_dtypes):
6565
tm.assert_extension_array_equal(result, expected)
6666

6767

68-
def test_repr_matches():
69-
idx = IntervalIndex.from_breaks([1, 2, 3])
70-
a = repr(idx)
71-
b = repr(idx.values)
72-
assert a.replace("Index", "Array") == b
68+
def test_repr_small():
69+
arr = IntervalArray.from_breaks([1, 2, 3])
70+
result = repr(arr)
71+
expected = (
72+
'<IntervalArray>\n'
73+
'[(1, 2], (2, 3]]\n'
74+
'Length: 2, dtype: interval[int64]'
75+
)
76+
assert result == expected
77+
78+
79+
def test_repr_large():
80+
arr = IntervalArray.from_breaks([1, 2, 3, 4, 5, 6])
81+
with option_context('display.max_seq_items', 2):
82+
result = repr(arr)
83+
expected = (
84+
'<IntervalArray>\n'
85+
'[(1, 2],\n'
86+
' ...\n'
87+
' (5, 6]] \n'
88+
'Length: 5, dtype: interval[int64]'
89+
)
90+
assert result == expected

pandas/tests/arrays/test_integer.py

+14-15
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# -*- coding: utf-8 -*-
22
import numpy as np
3-
import pytest
43

54
from pandas.core.dtypes.generic import ABCIndexClass
65

@@ -12,6 +11,7 @@
1211
UInt32Dtype, UInt64Dtype)
1312
from pandas.tests.extension.base import BaseOpsUtil
1413
import pandas.util.testing as tm
14+
import pytest
1515

1616

1717
def make_data():
@@ -57,24 +57,23 @@ def test_dtypes(dtype):
5757
assert dtype.name is not None
5858

5959

60-
class TestInterface(object):
60+
def test_repr_array(data):
61+
result = repr(data)
62+
assert '<IntegerArray>' in result
6163

62-
def test_repr_array(self, data):
63-
result = repr(data)
64+
# not long
65+
assert '...' not in result
66+
assert 'Length: ' in result
67+
assert 'dtype: ' in result
6468

65-
# not long
66-
assert '...' not in result
6769

68-
assert 'dtype=' in result
69-
assert 'IntegerArray' in result
70-
71-
def test_repr_array_long(self, data):
72-
# some arrays may be able to assert a ... in the repr
73-
with pd.option_context('display.max_seq_items', 1):
74-
result = repr(data)
70+
def test_repr_array_long(data):
71+
# some arrays may be able to assert a ... in the repr
72+
with pd.option_context('display.max_seq_items', 1):
73+
result = repr(data)
7574

76-
assert '...' in result
77-
assert 'length' in result
75+
assert '...' in result
76+
assert 'Length' in result
7877

7978

8079
class TestConstructors(object):

pandas/tests/arrays/test_period.py

+32-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import numpy as np
2-
import pytest
32

43
from pandas._libs.tslibs import iNaT
54
from pandas._libs.tslibs.period import IncompatibleFrequency
@@ -10,6 +9,7 @@
109
import pandas as pd
1110
from pandas.core.arrays import PeriodArray, period_array
1211
import pandas.util.testing as tm
12+
import pytest
1313

1414
# ----------------------------------------------------------------------------
1515
# Constructors
@@ -195,3 +195,34 @@ def tet_sub_period():
195195
other = pd.Period("2000", freq="M")
196196
with tm.assert_raises_regex(IncompatibleFrequency, "freq"):
197197
arr - other
198+
199+
200+
# ----------------------------------------------------------------------------
201+
# Printing
202+
203+
def test_repr_small():
204+
arr = period_array(['2000', '2001'], freq='D')
205+
result = str(arr)
206+
expected = (
207+
'<PeriodArray>\n'
208+
'[2000-01-01, 2001-01-01]\n'
209+
'Length: 2, dtype: period[D]'
210+
)
211+
assert result == expected
212+
213+
214+
def test_repr_large():
215+
arr = period_array(['2000', '2001'] * 500, freq='D')
216+
result = str(arr)
217+
expected = (
218+
'<PeriodArray>\n'
219+
'[2000-01-01, 2001-01-01, 2000-01-01, 2001-01-01, 2000-01-01, '
220+
'2001-01-01,\n' # continuation
221+
' 2000-01-01, 2001-01-01, 2000-01-01, 2001-01-01,\n'
222+
' ...\n'
223+
' 2000-01-01, 2001-01-01, 2000-01-01, 2001-01-01, 2000-01-01, '
224+
'2001-01-01,\n' # continuation
225+
' 2000-01-01, 2001-01-01, 2000-01-01, 2001-01-01]\n'
226+
'Length: 1000, dtype: period[D]'
227+
)
228+
assert result == expected

pandas/tests/extension/base/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ class TestMyDtype(BaseDtypeTests):
4848
from .interface import BaseInterfaceTests # noqa
4949
from .methods import BaseMethodsTests # noqa
5050
from .ops import BaseArithmeticOpsTests, BaseComparisonOpsTests, BaseOpsUtil # noqa
51+
from .printing import BasePrintingTests # noqa
5152
from .reduce import BaseNoReduceTests, BaseNumericReduceTests, BaseBooleanReduceTests # noqa
5253
from .missing import BaseMissingTests # noqa
5354
from .reshaping import BaseReshapingTests # noqa

0 commit comments

Comments
 (0)