From 8466f96911c730be1af08be5aa57de41ad041460 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 16 Jan 2021 01:09:20 +0000 Subject: [PATCH] Backport PR #39194: REGR: fillna on datetime64[ns, UTC] column hits RecursionError --- doc/source/whatsnew/v1.2.1.rst | 1 + pandas/core/internals/blocks.py | 39 +++++++++++++++++----- pandas/core/internals/managers.py | 2 +- pandas/tests/series/methods/test_fillna.py | 11 +++++- 4 files changed, 42 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst index 918e85ffa4af1..448260148c83d 100644 --- a/doc/source/whatsnew/v1.2.1.rst +++ b/doc/source/whatsnew/v1.2.1.rst @@ -27,6 +27,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.apply` with ``axis=1`` using str accessor in apply function (:issue:`38979`) - Fixed regression in :meth:`DataFrame.replace` raising ``ValueError`` when :class:`DataFrame` has dtype ``bytes`` (:issue:`38900`) - Fixed regression in :meth:`DataFrameGroupBy.diff` raising for ``int8`` and ``int16`` columns (:issue:`39050`) +- Fixed regression in :meth:`Series.fillna` that raised ``RecursionError`` with ``datetime64[ns, UTC]`` dtype (:issue:`38851`) - Fixed regression that raised ``AttributeError`` with PyArrow versions [0.16.0, 1.0.0) (:issue:`38801`) - Fixed regression in :meth:`DataFrame.groupby` when aggregating an :class:`ExtensionDType` that could fail for non-numeric values (:issue:`38980`) - Fixed regression in :meth:`DataFrame.loc.__setitem__` raising ``KeyError`` with :class:`MultiIndex` and list-like columns indexer enlarging :class:`DataFrame` (:issue:`39147`) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index b8de2ea0892db..32aade97c8736 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2148,7 +2148,13 @@ def _can_hold_element(self, element: Any) -> bool: class DatetimeLikeBlockMixin(Block): """Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock.""" - _can_hold_na = True + @property + def _holder(self): + return DatetimeArray + + @property + def fill_value(self): + return np.datetime64("NaT", "ns") def get_values(self, dtype=None): """ @@ -2216,8 +2222,10 @@ def to_native_types(self, na_rep="NaT", **kwargs): class DatetimeBlock(DatetimeLikeBlockMixin): __slots__ = () is_datetime = True - _holder = DatetimeArray - fill_value = np.datetime64("NaT", "ns") + + @property + def _can_hold_na(self): + return True def _maybe_coerce_values(self, values): """ @@ -2308,17 +2316,17 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock): is_extension = True internal_values = Block.internal_values - - _holder = DatetimeBlock._holder _can_hold_element = DatetimeBlock._can_hold_element to_native_types = DatetimeBlock.to_native_types diff = DatetimeBlock.diff - fillna = DatetimeBlock.fillna # i.e. Block.fillna - fill_value = DatetimeBlock.fill_value - _can_hold_na = DatetimeBlock._can_hold_na + fill_value = np.datetime64("NaT", "ns") array_values = ExtensionBlock.array_values + @property + def _holder(self): + return DatetimeArray + def _maybe_coerce_values(self, values): """ Input validation for values passed to __init__. Ensure that @@ -2383,6 +2391,17 @@ def external_values(self): # return an object-dtype ndarray of Timestamps. return np.asarray(self.values.astype("datetime64[ns]", copy=False)) + def fillna(self, value, limit=None, inplace=False, downcast=None): + # We support filling a DatetimeTZ with a `value` whose timezone + # is different by coercing to object. + if self._can_hold_element(value): + return super().fillna(value, limit, inplace, downcast) + + # different timezones, or a non-tz + return self.astype(object).fillna( + value, limit=limit, inplace=inplace, downcast=downcast + ) + def quantile(self, qs, interpolation="linear", axis=0): naive = self.values.view("M8[ns]") @@ -2419,9 +2438,11 @@ def _check_ndim(self, values, ndim): return ndim -class TimeDeltaBlock(DatetimeLikeBlockMixin): +class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock): __slots__ = () is_timedelta = True + _can_hold_na = True + is_numeric = False fill_value = np.timedelta64("NaT", "ns") def _maybe_coerce_values(self, values): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 93ab207d8ce12..9aebacd740526 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1916,7 +1916,7 @@ def _consolidate(blocks): merged_blocks = _merge_blocks( list(group_blocks), dtype=dtype, can_consolidate=_can_consolidate ) - new_blocks.extend(merged_blocks) + new_blocks = extend_blocks(merged_blocks, new_blocks) return new_blocks diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index aaa58cdb390f7..c2219e9fd45a6 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -1,4 +1,4 @@ -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone import numpy as np import pytest @@ -13,6 +13,7 @@ Series, Timedelta, Timestamp, + date_range, isna, ) import pandas._testing as tm @@ -711,6 +712,14 @@ def test_fillna_method_and_limit_invalid(self): with pytest.raises(ValueError, match=msg): ser.fillna(1, limit=limit, method=method) + def test_fillna_datetime64_with_timezone_tzinfo(self): + # https://github.com/pandas-dev/pandas/issues/38851 + s = Series(date_range("2020", periods=3, tz="UTC")) + expected = s.astype(object) + s[1] = NaT + result = s.fillna(datetime(2020, 1, 2, tzinfo=timezone.utc)) + tm.assert_series_equal(result, expected) + class TestFillnaPad: def test_fillna_bug(self):