From 5d023cc96feca4d00b21f089c9a2af4332dd81ca Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 20 Jun 2019 13:55:08 +0100 Subject: [PATCH 1/3] REF: remove special-casing for internal EAs from format_array --- pandas/core/arrays/base.py | 2 +- pandas/core/arrays/categorical.py | 3 + pandas/core/arrays/datetimelike.py | 10 +- pandas/core/arrays/timedeltas.py | 2 +- pandas/core/indexes/interval.py | 11 ++- pandas/io/formats/format.py | 91 +++++++------------ pandas/tests/frame/test_dtypes.py | 8 +- pandas/tests/frame/test_repr_info.py | 12 +-- .../tests/indexes/datetimes/test_formats.py | 12 +-- pandas/tests/io/formats/test_format.py | 24 ++--- pandas/tests/series/test_repr.py | 80 ++++++++-------- 11 files changed, 120 insertions(+), 135 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index c709cd9e9f0b2..0a9f47705990a 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -881,7 +881,7 @@ def _formatter( ``boxed=True``. """ if boxed: - return str + return None return repr def _formatting_values(self) -> np.ndarray: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index c079b860bb924..2cf6173f65a76 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -457,6 +457,9 @@ def _formatter(self, boxed=False): # Defer to CategoricalFormatter's formatter. return None + def _formatting_values(self): + return self.get_values() + def copy(self): """ Copy constructor. diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index ebf1f692ccde6..8138bb2d6ef80 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -385,8 +385,14 @@ def _format_native_types(self, na_rep='NaT', date_format=None): raise AbstractMethodError(self) def _formatter(self, boxed=False): - # TODO: Remove Datetime & DatetimeTZ formatters. - return "'{}'".format + from pandas.io.formats.format import ( + _is_dates_only, _get_format_datetime64) + if boxed: + values = self.values.astype(object) + is_dates_only = _is_dates_only(values) + return _get_format_datetime64(is_dates_only) + else: + return "'{}'".format # ---------------------------------------------------------------- # Array-Like / EA-Interface Methods diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 50bc8d6d3ae6b..23b1cba7450d7 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -363,7 +363,7 @@ def astype(self, dtype, copy=True): def _formatter(self, boxed=False): from pandas.io.formats.format import _get_format_timedelta64 - return _get_format_timedelta64(self, box=True) + return _get_format_timedelta64(self, box=not boxed) def _format_native_types(self, na_rep='NaT', date_format=None): from pandas.io.formats.format import _get_format_timedelta64 diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 896935fa72adb..8bc6e4e62a949 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1061,11 +1061,12 @@ def _format_with_header(self, header, **kwargs): def _format_native_types(self, na_rep='NaN', quoting=None, **kwargs): """ actually format my specific types """ - from pandas.io.formats.format import ExtensionArrayFormatter - return ExtensionArrayFormatter(values=self, - na_rep=na_rep, - justify='all', - leading_space=False).get_result() + from pandas.io.formats.format import format_array + return format_array(values=self, + formatter=None, + na_rep=na_rep, + justify='all', + leading_space=False) def _format_data(self, name=None): diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 8655fb05f34e2..1a95a193c0044 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -17,17 +17,15 @@ from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT from pandas.core.dtypes.common import ( - is_categorical_dtype, is_complex_dtype, is_datetime64_dtype, - is_datetime64tz_dtype, is_extension_array_dtype, is_float, is_float_dtype, - is_integer, is_integer_dtype, is_list_like, is_numeric_dtype, is_scalar, - is_timedelta64_dtype) -from pandas.core.dtypes.generic import ( - ABCIndexClass, ABCMultiIndex, ABCSeries, ABCSparseArray) + is_categorical_dtype, is_complex_dtype, is_datetime64_dtype, is_float, + is_float_dtype, is_integer, is_integer_dtype, is_list_like, + is_numeric_dtype, is_scalar, is_timedelta64_dtype) +from pandas.core.dtypes.generic import ABCIndexClass, ABCMultiIndex from pandas.core.dtypes.missing import isna, notna from pandas.core.base import PandasObject import pandas.core.common as com -from pandas.core.index import Index, ensure_index +from pandas.core.index import ensure_index from pandas.core.indexes.datetimes import DatetimeIndex from pandas.io.common import _expand_user, _stringify_path @@ -248,8 +246,8 @@ def _get_formatted_index(self): return fmt_index, have_header def _get_formatted_values(self): - values_to_format = self.tr_series._formatting_values() - return format_array(values_to_format, None, + values = self.tr_series + return format_array(values, formatter=None, float_format=self.float_format, na_rep=self.na_rep) def to_string(self): @@ -713,10 +711,9 @@ def to_latex(self, column_format=None, longtable=False, encoding=None, 'method') def _format_col(self, i): - frame = self.tr_frame + values = self.tr_frame.iloc[:, i] formatter = self._get_formatter(i) - values_to_format = frame.iloc[:, i]._formatting_values() - return format_array(values_to_format, formatter, + return format_array(values, formatter=formatter, float_format=self.float_format, na_rep=self.na_rep, space=self.col_space, decimal=self.decimal) @@ -883,14 +880,34 @@ def format_array(values, formatter, float_format=None, na_rep='NaN', List[str] """ + def _get_formatted_values(values): + + if isinstance(values, ABCIndexClass): + values = values._values + + try: + formatter = values._formatter(boxed=True) + except AttributeError: + formatter = None + + def _format_values(values): + if formatter is None: + return values + else: + return np.array([formatter(x) for x in values]) + + try: + values = values._formatting_values() + return _format_values(_get_formatted_values(values)) + except AttributeError: + return _format_values(values) + + values = _get_formatted_values(values) + if is_datetime64_dtype(values.dtype): fmt_klass = Datetime64Formatter - elif is_datetime64tz_dtype(values): - fmt_klass = Datetime64TZFormatter elif is_timedelta64_dtype(values.dtype): fmt_klass = Timedelta64Formatter - elif is_extension_array_dtype(values.dtype): - fmt_klass = ExtensionArrayFormatter elif is_float_dtype(values.dtype) or is_complex_dtype(values.dtype): fmt_klass = FloatArrayFormatter elif is_integer_dtype(values.dtype): @@ -970,10 +987,6 @@ def _format(x): return '{x}'.format(x=formatter(x)) vals = self.values - if isinstance(vals, Index): - vals = vals._values - elif isinstance(vals, ABCSparseArray): - vals = vals.values is_float_type = lib.map_infer(vals, is_float) & notna(vals) leading_space = self.leading_space @@ -1185,29 +1198,6 @@ def _format_strings(self): return fmt_values.tolist() -class ExtensionArrayFormatter(GenericArrayFormatter): - def _format_strings(self): - values = self.values - if isinstance(values, (ABCIndexClass, ABCSeries)): - values = values._values - - formatter = values._formatter(boxed=True) - - if is_categorical_dtype(values.dtype): - # Categorical is special for now, so that we can preserve tzinfo - array = values.get_values() - else: - array = np.asarray(values) - - fmt_values = format_array(array, - formatter, - float_format=self.float_format, - na_rep=self.na_rep, digits=self.digits, - space=self.space, justify=self.justify, - leading_space=self.leading_space) - return fmt_values - - def format_percentiles(percentiles): """ Outputs rounded and formatted percentiles. @@ -1330,21 +1320,6 @@ def _get_format_datetime64_from_values(values, date_format): return date_format -class Datetime64TZFormatter(Datetime64Formatter): - - def _format_strings(self): - """ we by definition have a TZ """ - - values = self.values.astype(object) - is_dates_only = _is_dates_only(values) - formatter = (self.formatter or - _get_format_datetime64(is_dates_only, - date_format=self.date_format)) - fmt_values = [formatter(x) for x in values] - - return fmt_values - - class Timedelta64Formatter(GenericArrayFormatter): def __init__(self, values, nat_rep='NaT', box=False, **kwargs): diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 96cf70483d4e7..c68b8f9b1c47c 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -1009,9 +1009,9 @@ def test_astype_str(self): with option_context('display.max_columns', 20): result = str(self.tzframe) - assert ('0 2013-01-01 2013-01-01 00:00:00-05:00 ' + assert ('0 2013-01-01 2013-01-01 00:00:00-05:00 ' '2013-01-01 00:00:00+01:00') in result - assert ('1 2013-01-02 ' - 'NaT NaT') in result - assert ('2 2013-01-03 2013-01-03 00:00:00-05:00 ' + assert ('1 2013-01-02 ' + 'NaT NaT') in result + assert ('2 2013-01-03 2013-01-03 00:00:00-05:00 ' '2013-01-03 00:00:00+01:00') in result diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 24dba8cb964cc..612a3df200046 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -504,12 +504,12 @@ def test_repr_categorical_dates_periods(self): tz='US/Eastern') p = period_range('2011-01', freq='M', periods=5) df = DataFrame({'dt': dt, 'p': p}) - exp = """ dt p -0 2011-01-01 09:00:00-05:00 2011-01 -1 2011-01-01 10:00:00-05:00 2011-02 -2 2011-01-01 11:00:00-05:00 2011-03 -3 2011-01-01 12:00:00-05:00 2011-04 -4 2011-01-01 13:00:00-05:00 2011-05""" + exp = """ dt p +0 2011-01-01 09:00:00-05:00 2011-01 +1 2011-01-01 10:00:00-05:00 2011-02 +2 2011-01-01 11:00:00-05:00 2011-03 +3 2011-01-01 12:00:00-05:00 2011-04 +4 2011-01-01 13:00:00-05:00 2011-05""" assert repr(df) == exp diff --git a/pandas/tests/indexes/datetimes/test_formats.py b/pandas/tests/indexes/datetimes/test_formats.py index 37bd68176fe9e..8d0bed2908bc3 100644 --- a/pandas/tests/indexes/datetimes/test_formats.py +++ b/pandas/tests/indexes/datetimes/test_formats.py @@ -127,14 +127,14 @@ def test_dti_representation_to_series(self): "2 2011-01-03\n" "dtype: datetime64[ns]") - exp5 = ("0 2011-01-01 09:00:00+09:00\n" - "1 2011-01-01 10:00:00+09:00\n" - "2 2011-01-01 11:00:00+09:00\n" + exp5 = ("0 2011-01-01 09:00:00+09:00\n" + "1 2011-01-01 10:00:00+09:00\n" + "2 2011-01-01 11:00:00+09:00\n" "dtype: datetime64[ns, Asia/Tokyo]") - exp6 = ("0 2011-01-01 09:00:00-05:00\n" - "1 2011-01-01 10:00:00-05:00\n" - "2 NaT\n" + exp6 = ("0 2011-01-01 09:00:00-05:00\n" + "1 2011-01-01 10:00:00-05:00\n" + "2 NaT\n" "dtype: datetime64[ns, US/Eastern]") exp7 = ("0 2011-01-01 09:00:00\n" diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index edb7c2136825d..2af31711dda93 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -872,12 +872,12 @@ def test_datetimelike_frame(self): df = pd.DataFrame({"dt": dts, "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}) with option_context('display.max_rows', 5): - expected = (' dt x\n' - '0 2011-01-01 00:00:00-05:00 1\n' - '1 2011-01-01 00:00:00-05:00 2\n' - '.. ... ..\n' - '8 NaT 9\n' - '9 NaT 10\n\n' + expected = (' dt x\n' + '0 2011-01-01 00:00:00-05:00 1\n' + '1 2011-01-01 00:00:00-05:00 2\n' + '.. ... ..\n' + '8 NaT 9\n' + '9 NaT 10\n\n' '[10 rows x 2 columns]') assert repr(df) == expected @@ -885,12 +885,12 @@ def test_datetimelike_frame(self): df = pd.DataFrame({"dt": dts, "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}) with option_context('display.max_rows', 5): - expected = (' dt x\n' - '0 NaT 1\n' - '1 NaT 2\n' - '.. ... ..\n' - '8 2011-01-01 00:00:00-05:00 9\n' - '9 2011-01-01 00:00:00-05:00 10\n\n' + expected = (' dt x\n' + '0 NaT 1\n' + '1 NaT 2\n' + '.. ... ..\n' + '8 2011-01-01 00:00:00-05:00 9\n' + '9 2011-01-01 00:00:00-05:00 10\n\n' '[10 rows x 2 columns]') assert repr(df) == expected diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index 92b6fb0610979..7c453f4571f24 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -306,11 +306,11 @@ def test_categorical_series_repr_datetime(self): idx = date_range('2011-01-01 09:00', freq='H', periods=5, tz='US/Eastern') s = Series(Categorical(idx)) - exp = """0 2011-01-01 09:00:00-05:00 -1 2011-01-01 10:00:00-05:00 -2 2011-01-01 11:00:00-05:00 -3 2011-01-01 12:00:00-05:00 -4 2011-01-01 13:00:00-05:00 + exp = """0 2011-01-01 09:00:00-05:00 +1 2011-01-01 10:00:00-05:00 +2 2011-01-01 11:00:00-05:00 +3 2011-01-01 12:00:00-05:00 +4 2011-01-01 13:00:00-05:00 dtype: category Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, @@ -335,11 +335,11 @@ def test_categorical_series_repr_datetime_ordered(self): idx = date_range('2011-01-01 09:00', freq='H', periods=5, tz='US/Eastern') s = Series(Categorical(idx, ordered=True)) - exp = """0 2011-01-01 09:00:00-05:00 -1 2011-01-01 10:00:00-05:00 -2 2011-01-01 11:00:00-05:00 -3 2011-01-01 12:00:00-05:00 -4 2011-01-01 13:00:00-05:00 + exp = """0 2011-01-01 09:00:00-05:00 +1 2011-01-01 10:00:00-05:00 +2 2011-01-01 11:00:00-05:00 +3 2011-01-01 12:00:00-05:00 +4 2011-01-01 13:00:00-05:00 dtype: category Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < @@ -402,11 +402,11 @@ def test_categorical_series_repr_period_ordered(self): def test_categorical_series_repr_timedelta(self): idx = timedelta_range('1 days', periods=5) s = Series(Categorical(idx)) - exp = """0 1 days -1 2 days -2 3 days -3 4 days -4 5 days + exp = """0 1 days +1 2 days +2 3 days +3 4 days +4 5 days dtype: category Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" @@ -414,16 +414,16 @@ def test_categorical_series_repr_timedelta(self): idx = timedelta_range('1 hours', periods=10) s = Series(Categorical(idx)) - exp = """0 0 days 01:00:00 -1 1 days 01:00:00 -2 2 days 01:00:00 -3 3 days 01:00:00 -4 4 days 01:00:00 -5 5 days 01:00:00 -6 6 days 01:00:00 -7 7 days 01:00:00 -8 8 days 01:00:00 -9 9 days 01:00:00 + exp = """0 0 days 01:00:00 +1 1 days 01:00:00 +2 2 days 01:00:00 +3 3 days 01:00:00 +4 4 days 01:00:00 +5 5 days 01:00:00 +6 6 days 01:00:00 +7 7 days 01:00:00 +8 8 days 01:00:00 +9 9 days 01:00:00 dtype: category Categories (10, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00, @@ -434,11 +434,11 @@ def test_categorical_series_repr_timedelta(self): def test_categorical_series_repr_timedelta_ordered(self): idx = timedelta_range('1 days', periods=5) s = Series(Categorical(idx, ordered=True)) - exp = """0 1 days -1 2 days -2 3 days -3 4 days -4 5 days + exp = """0 1 days +1 2 days +2 3 days +3 4 days +4 5 days dtype: category Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa @@ -446,16 +446,16 @@ def test_categorical_series_repr_timedelta_ordered(self): idx = timedelta_range('1 hours', periods=10) s = Series(Categorical(idx, ordered=True)) - exp = """0 0 days 01:00:00 -1 1 days 01:00:00 -2 2 days 01:00:00 -3 3 days 01:00:00 -4 4 days 01:00:00 -5 5 days 01:00:00 -6 6 days 01:00:00 -7 7 days 01:00:00 -8 8 days 01:00:00 -9 9 days 01:00:00 + exp = """0 0 days 01:00:00 +1 1 days 01:00:00 +2 2 days 01:00:00 +3 3 days 01:00:00 +4 4 days 01:00:00 +5 5 days 01:00:00 +6 6 days 01:00:00 +7 7 days 01:00:00 +8 8 days 01:00:00 +9 9 days 01:00:00 dtype: category Categories (10, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < 3 days 01:00:00 ... 6 days 01:00:00 < 7 days 01:00:00 < From 96546ae1b0b7d9aecf4d977553de52aa6d04a00d Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 20 Jun 2019 15:13:20 +0100 Subject: [PATCH 2/3] remove DeprecationWarning --- pandas/core/arrays/base.py | 2 +- pandas/core/internals/blocks.py | 8 -------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 0a9f47705990a..536bd9804b627 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -857,7 +857,7 @@ def __repr__(self): def _formatter( self, boxed: bool = False, - ) -> Callable[[Any], Optional[str]]: + ) -> Optional[Callable[[Any], Optional[str]]]: """Formatting function for scalar values. This is used in the default '__repr__'. The returned formatting diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 4cc6c86417b3b..12a9930a6eeb4 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1768,16 +1768,8 @@ def _slice(self, slicer): return self.values[slicer] def formatting_values(self): - # Deprecating the ability to override _formatting_values. - # Do the warning here, it's only user in pandas, since we - # have to check if the subclass overrode it. fv = getattr(type(self.values), '_formatting_values', None) if fv and fv != ExtensionArray._formatting_values: - msg = ( - "'ExtensionArray._formatting_values' is deprecated. " - "Specify 'ExtensionArray._formatter' instead." - ) - warnings.warn(msg, DeprecationWarning, stacklevel=10) return self.values._formatting_values() return self.values From 1ef196918d40a5205c74b44ef2c4a966d6c69e92 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 20 Jun 2019 15:18:53 +0100 Subject: [PATCH 3/3] remove test_formatting_values_deprecated --- pandas/tests/extension/decimal/test_decimal.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 94c0b61c6382a..678613e9550b9 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -384,15 +384,3 @@ def test_divmod_array(reverse, expected_div, expected_mod): tm.assert_extension_array_equal(div, expected_div) tm.assert_extension_array_equal(mod, expected_mod) - - -def test_formatting_values_deprecated(): - class DecimalArray2(DecimalArray): - def _formatting_values(self): - return np.array(self) - - ser = pd.Series(DecimalArray2([decimal.Decimal('1.0')])) - - with tm.assert_produces_warning(DeprecationWarning, - check_stacklevel=False): - repr(ser)