Skip to content

REGR: fillna on datetime64[ns, UTC] column hits RecursionError #39194

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ Fixed regressions
- Fixed regression in :meth:`read_csv` and other read functions were the encoding error policy (``errors``) did not default to ``"replace"`` when no encoding was specified (:issue:`38989`)
- Fixed regression in :meth:`DataFrame.replace` raising ``ValueError`` when :class:`DataFrame` has dtype ``bytes`` (:issue:`38900`)
- Fixed regression in :meth:`DataFrameGroupBy.diff` raising for ``int8`` and ``int16`` columns (:issue:`39050`)
- Fixed regression in :meth:`Series.fillna` that raised ``RecursionError`` with ``datetime64[ns, UTC]`` dtype (:issue:`38851`)
- Fixed regression that raised ``AttributeError`` with PyArrow versions [0.16.0, 1.0.0) (:issue:`38801`)
- Fixed regression in :meth:`DataFrame.groupby` when aggregating an :class:`ExtensionDType` that could fail for non-numeric values (:issue:`38980`)
- Fixed regression in :meth:`DataFrame.loc.__setitem__` raising ``KeyError`` with :class:`MultiIndex` and list-like columns indexer enlarging :class:`DataFrame` (:issue:`39147`)
Expand Down
39 changes: 30 additions & 9 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1968,7 +1968,13 @@ class IntBlock(NumericBlock):
class DatetimeLikeBlockMixin(HybridMixin, Block):
"""Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock."""

_can_hold_na = True
@property
def _holder(self):
return DatetimeArray

@property
def fill_value(self):
return np.datetime64("NaT", "ns")

def get_values(self, dtype: Optional[Dtype] = None):
"""
Expand Down Expand Up @@ -2052,8 +2058,10 @@ def where(self, other, cond, errors="raise", axis: int = 0) -> List["Block"]:
class DatetimeBlock(DatetimeLikeBlockMixin):
__slots__ = ()
is_datetime = True
_holder = DatetimeArray
fill_value = np.datetime64("NaT", "ns")

@property
def _can_hold_na(self):
return True

def _maybe_coerce_values(self, values):
"""
Expand Down Expand Up @@ -2099,18 +2107,18 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
is_extension = True

internal_values = Block.internal_values

_holder = DatetimeBlock._holder
_can_hold_element = DatetimeBlock._can_hold_element
to_native_types = DatetimeBlock.to_native_types
diff = DatetimeBlock.diff
fillna = DatetimeBlock.fillna # i.e. Block.fillna
fill_value = DatetimeBlock.fill_value
_can_hold_na = DatetimeBlock._can_hold_na
fill_value = np.datetime64("NaT", "ns")
where = DatetimeBlock.where

array_values = ExtensionBlock.array_values

@property
def _holder(self):
return DatetimeArray

def _maybe_coerce_values(self, values):
"""
Input validation for values passed to __init__. Ensure that
Expand Down Expand Up @@ -2175,6 +2183,17 @@ def external_values(self):
# return an object-dtype ndarray of Timestamps.
return np.asarray(self.values.astype("datetime64[ns]", copy=False))

def fillna(self, value, limit=None, inplace=False, downcast=None):
# We support filling a DatetimeTZ with a `value` whose timezone
# is different by coercing to object.
if self._can_hold_element(value):
return super().fillna(value, limit, inplace, downcast)

# different timezones, or a non-tz
return self.astype(object).fillna(
value, limit=limit, inplace=inplace, downcast=downcast
)

def quantile(self, qs, interpolation="linear", axis=0):
naive = self.values.view("M8[ns]")

Expand Down Expand Up @@ -2211,9 +2230,11 @@ def _check_ndim(self, values, ndim):
return ndim


class TimeDeltaBlock(DatetimeLikeBlockMixin):
class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock):
__slots__ = ()
is_timedelta = True
_can_hold_na = True
is_numeric = False
fill_value = np.timedelta64("NaT", "ns")

def _maybe_coerce_values(self, values):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1891,7 +1891,7 @@ def _consolidate(blocks):
merged_blocks = _merge_blocks(
list(group_blocks), dtype=dtype, can_consolidate=_can_consolidate
)
new_blocks.extend(merged_blocks)
new_blocks = extend_blocks(merged_blocks, new_blocks)
return new_blocks


Expand Down
11 changes: 10 additions & 1 deletion pandas/tests/series/methods/test_fillna.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone

import numpy as np
import pytest
Expand All @@ -13,6 +13,7 @@
Series,
Timedelta,
Timestamp,
date_range,
isna,
)
import pandas._testing as tm
Expand Down Expand Up @@ -724,6 +725,14 @@ def test_fillna_method_and_limit_invalid(self):
with pytest.raises(ValueError, match=msg):
ser.fillna(1, limit=limit, method=method)

def test_fillna_datetime64_with_timezone_tzinfo(self):
# https://github.com/pandas-dev/pandas/issues/38851
s = Series(date_range("2020", periods=3, tz="UTC"))
expected = s.astype(object)
s[1] = NaT
result = s.fillna(datetime(2020, 1, 2, tzinfo=timezone.utc))
tm.assert_series_equal(result, expected)


class TestFillnaPad:
def test_fillna_bug(self):
Expand Down