Skip to content

Commit 8614088

Browse files
authored
BUG: fillna with mixed-resolution dt64/td64 (#56413)
* BUG: fillna with mixed-resolution dt64/td64 * mypy fixup * troubleshoot docbuild * typo fixup in whatsnew
1 parent a6c0ae4 commit 8614088

File tree

4 files changed

+109
-20
lines changed

4 files changed

+109
-20
lines changed

doc/source/whatsnew/v2.2.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,7 @@ Datetimelike
537537
- Bug in :meth:`Index.is_monotonic_increasing` and :meth:`Index.is_monotonic_decreasing` always caching :meth:`Index.is_unique` as ``True`` when first value in index is ``NaT`` (:issue:`55755`)
538538
- Bug in :meth:`Index.view` to a datetime64 dtype with non-supported resolution incorrectly raising (:issue:`55710`)
539539
- Bug in :meth:`Series.dt.round` with non-nanosecond resolution and ``NaT`` entries incorrectly raising ``OverflowError`` (:issue:`56158`)
540+
- Bug in :meth:`Series.fillna` with non-nanosecond resolution dtypes and higher-resolution vector values returning incorrect (internally-corrupted) results (:issue:`56410`)
540541
- Bug in :meth:`Tick.delta` with very large ticks raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`)
541542
- Bug in :meth:`Timestamp.unit` being inferred incorrectly from an ISO8601 format string with minute or hour resolution and a timezone offset (:issue:`56208`)
542543
- Bug in ``.astype`` converting from a higher-resolution ``datetime64`` dtype to a lower-resolution ``datetime64`` dtype (e.g. ``datetime64[us]->datetim64[ms]``) silently overflowing with values near the lower implementation bound (:issue:`55979`)
@@ -550,7 +551,6 @@ Datetimelike
550551
- Bug in parsing datetime strings with nanosecond resolution with non-ISO8601 formats incorrectly truncating sub-microsecond components (:issue:`56051`)
551552
- Bug in parsing datetime strings with sub-second resolution and trailing zeros incorrectly inferring second or millisecond resolution (:issue:`55737`)
552553
- Bug in the results of :func:`to_datetime` with an floating-dtype argument with ``unit`` not matching the pointwise results of :class:`Timestamp` (:issue:`56037`)
553-
-
554554

555555
Timedelta
556556
^^^^^^^^^

pandas/core/arrays/_mixins.py

+6
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,12 @@ def _where(self: Self, mask: npt.NDArray[np.bool_], value) -> Self:
430430
value = self._validate_setitem_value(value)
431431

432432
res_values = np.where(mask, self._ndarray, value)
433+
if res_values.dtype != self._ndarray.dtype:
434+
raise AssertionError(
435+
# GH#56410
436+
"Something has gone wrong, please report a bug at "
437+
"github.com/pandas-dev/pandas/"
438+
)
433439
return self._from_backing_data(res_values)
434440

435441
# ------------------------------------------------------------------------

pandas/core/arrays/datetimelike.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -646,6 +646,9 @@ def _validation_error_message(self, value, allow_listlike: bool = False) -> str:
646646

647647
def _validate_listlike(self, value, allow_object: bool = False):
648648
if isinstance(value, type(self)):
649+
if self.dtype.kind in "mM" and not allow_object:
650+
# error: "DatetimeLikeArrayMixin" has no attribute "as_unit"
651+
value = value.as_unit(self.unit, round_ok=False) # type: ignore[attr-defined]
649652
return value
650653

651654
if isinstance(value, list) and len(value) == 0:
@@ -694,6 +697,9 @@ def _validate_listlike(self, value, allow_object: bool = False):
694697
msg = self._validation_error_message(value, True)
695698
raise TypeError(msg)
696699

700+
if self.dtype.kind in "mM" and not allow_object:
701+
# error: "DatetimeLikeArrayMixin" has no attribute "as_unit"
702+
value = value.as_unit(self.unit, round_ok=False) # type: ignore[attr-defined]
697703
return value
698704

699705
def _validate_setitem_value(self, value):
@@ -2138,12 +2144,12 @@ def unit(self) -> str:
21382144
# "ExtensionDtype"; expected "Union[DatetimeTZDtype, dtype[Any]]"
21392145
return dtype_to_unit(self.dtype) # type: ignore[arg-type]
21402146

2141-
def as_unit(self, unit: str) -> Self:
2147+
def as_unit(self, unit: str, round_ok: bool = True) -> Self:
21422148
if unit not in ["s", "ms", "us", "ns"]:
21432149
raise ValueError("Supported units are 's', 'ms', 'us', 'ns'")
21442150

21452151
dtype = np.dtype(f"{self.dtype.kind}8[{unit}]")
2146-
new_values = astype_overflowsafe(self._ndarray, dtype, round_ok=True)
2152+
new_values = astype_overflowsafe(self._ndarray, dtype, round_ok=round_ok)
21472153

21482154
if isinstance(self.dtype, np.dtype):
21492155
new_dtype = new_values.dtype

pandas/tests/series/methods/test_fillna.py

+94-17
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
Timestamp,
2020
date_range,
2121
isna,
22+
timedelta_range,
2223
)
2324
import pandas._testing as tm
2425
from pandas.core.arrays import period_array
@@ -239,15 +240,16 @@ def test_fillna_downcast_infer_objects_to_numeric(self):
239240
expected = Series([0, 1, 2.5, 4, 4], dtype=np.float64)
240241
tm.assert_series_equal(res, expected)
241242

242-
def test_timedelta_fillna(self, frame_or_series):
243+
def test_timedelta_fillna(self, frame_or_series, unit):
243244
# GH#3371
244245
ser = Series(
245246
[
246247
Timestamp("20130101"),
247248
Timestamp("20130101"),
248249
Timestamp("20130102"),
249250
Timestamp("20130103 9:01:01"),
250-
]
251+
],
252+
dtype=f"M8[{unit}]",
251253
)
252254
td = ser.diff()
253255
obj = frame_or_series(td).copy()
@@ -260,7 +262,8 @@ def test_timedelta_fillna(self, frame_or_series):
260262
timedelta(0),
261263
timedelta(1),
262264
timedelta(days=1, seconds=9 * 3600 + 60 + 1),
263-
]
265+
],
266+
dtype=f"m8[{unit}]",
264267
)
265268
expected = frame_or_series(expected)
266269
tm.assert_equal(result, expected)
@@ -279,7 +282,8 @@ def test_timedelta_fillna(self, frame_or_series):
279282
timedelta(0),
280283
timedelta(1),
281284
timedelta(days=1, seconds=9 * 3600 + 60 + 1),
282-
]
285+
],
286+
dtype=f"m8[{unit}]",
283287
)
284288
expected = frame_or_series(expected)
285289
tm.assert_equal(result, expected)
@@ -291,7 +295,8 @@ def test_timedelta_fillna(self, frame_or_series):
291295
timedelta(0),
292296
timedelta(1),
293297
timedelta(days=1, seconds=9 * 3600 + 60 + 1),
294-
]
298+
],
299+
dtype=f"m8[{unit}]",
295300
)
296301
expected = frame_or_series(expected)
297302
tm.assert_equal(result, expected)
@@ -303,7 +308,8 @@ def test_timedelta_fillna(self, frame_or_series):
303308
timedelta(0),
304309
timedelta(1),
305310
timedelta(days=1, seconds=9 * 3600 + 60 + 1),
306-
]
311+
],
312+
dtype=f"m8[{unit}]",
307313
)
308314
expected = frame_or_series(expected)
309315
tm.assert_equal(result, expected)
@@ -316,7 +322,7 @@ def test_timedelta_fillna(self, frame_or_series):
316322
timedelta(1),
317323
timedelta(days=1, seconds=9 * 3600 + 60 + 1),
318324
],
319-
dtype="m8[ns]",
325+
dtype=f"m8[{unit}]",
320326
)
321327
expected = frame_or_series(expected)
322328
tm.assert_equal(result, expected)
@@ -375,6 +381,72 @@ def test_datetime64_fillna(self):
375381
)
376382
tm.assert_series_equal(result, expected)
377383

384+
@pytest.mark.parametrize(
385+
"scalar",
386+
[
387+
False,
388+
pytest.param(
389+
True,
390+
marks=pytest.mark.xfail(
391+
reason="GH#56410 scalar case not yet addressed"
392+
),
393+
),
394+
],
395+
)
396+
@pytest.mark.parametrize("tz", [None, "UTC"])
397+
def test_datetime64_fillna_mismatched_reso_no_rounding(self, tz, scalar):
398+
# GH#56410
399+
dti = date_range("2016-01-01", periods=3, unit="s", tz=tz)
400+
item = Timestamp("2016-02-03 04:05:06.789", tz=tz)
401+
vec = date_range(item, periods=3, unit="ms")
402+
403+
exp_dtype = "M8[ms]" if tz is None else "M8[ms, UTC]"
404+
expected = Series([item, dti[1], dti[2]], dtype=exp_dtype)
405+
406+
ser = Series(dti)
407+
ser[0] = NaT
408+
ser2 = ser.copy()
409+
410+
res = ser.fillna(item)
411+
res2 = ser2.fillna(Series(vec))
412+
413+
if scalar:
414+
tm.assert_series_equal(res, expected)
415+
else:
416+
tm.assert_series_equal(res2, expected)
417+
418+
@pytest.mark.parametrize(
419+
"scalar",
420+
[
421+
False,
422+
pytest.param(
423+
True,
424+
marks=pytest.mark.xfail(
425+
reason="GH#56410 scalar case not yet addressed"
426+
),
427+
),
428+
],
429+
)
430+
def test_timedelta64_fillna_mismatched_reso_no_rounding(self, scalar):
431+
# GH#56410
432+
tdi = date_range("2016-01-01", periods=3, unit="s") - Timestamp("1970-01-01")
433+
item = Timestamp("2016-02-03 04:05:06.789") - Timestamp("1970-01-01")
434+
vec = timedelta_range(item, periods=3, unit="ms")
435+
436+
expected = Series([item, tdi[1], tdi[2]], dtype="m8[ms]")
437+
438+
ser = Series(tdi)
439+
ser[0] = NaT
440+
ser2 = ser.copy()
441+
442+
res = ser.fillna(item)
443+
res2 = ser2.fillna(Series(vec))
444+
445+
if scalar:
446+
tm.assert_series_equal(res, expected)
447+
else:
448+
tm.assert_series_equal(res2, expected)
449+
378450
def test_datetime64_fillna_backfill(self):
379451
# GH#6587
380452
# make sure that we are treating as integer when filling
@@ -392,15 +464,16 @@ def test_datetime64_fillna_backfill(self):
392464
tm.assert_series_equal(result, expected)
393465

394466
@pytest.mark.parametrize("tz", ["US/Eastern", "Asia/Tokyo"])
395-
def test_datetime64_tz_fillna(self, tz):
467+
def test_datetime64_tz_fillna(self, tz, unit):
396468
# DatetimeLikeBlock
397469
ser = Series(
398470
[
399471
Timestamp("2011-01-01 10:00"),
400472
NaT,
401473
Timestamp("2011-01-03 10:00"),
402474
NaT,
403-
]
475+
],
476+
dtype=f"M8[{unit}]",
404477
)
405478
null_loc = Series([False, True, False, True])
406479

@@ -411,7 +484,8 @@ def test_datetime64_tz_fillna(self, tz):
411484
Timestamp("2011-01-02 10:00"),
412485
Timestamp("2011-01-03 10:00"),
413486
Timestamp("2011-01-02 10:00"),
414-
]
487+
],
488+
dtype=f"M8[{unit}]",
415489
)
416490
tm.assert_series_equal(expected, result)
417491
# check s is not changed
@@ -468,15 +542,18 @@ def test_datetime64_tz_fillna(self, tz):
468542
Timestamp("2011-01-02 10:00"),
469543
Timestamp("2011-01-03 10:00"),
470544
Timestamp("2011-01-04 10:00"),
471-
]
545+
],
546+
dtype=f"M8[{unit}]",
472547
)
473548
tm.assert_series_equal(expected, result)
474549
tm.assert_series_equal(isna(ser), null_loc)
475550

476551
# DatetimeTZBlock
477-
idx = DatetimeIndex(["2011-01-01 10:00", NaT, "2011-01-03 10:00", NaT], tz=tz)
552+
idx = DatetimeIndex(
553+
["2011-01-01 10:00", NaT, "2011-01-03 10:00", NaT], tz=tz
554+
).as_unit(unit)
478555
ser = Series(idx)
479-
assert ser.dtype == f"datetime64[ns, {tz}]"
556+
assert ser.dtype == f"datetime64[{unit}, {tz}]"
480557
tm.assert_series_equal(isna(ser), null_loc)
481558

482559
result = ser.fillna(Timestamp("2011-01-02 10:00"))
@@ -500,7 +577,7 @@ def test_datetime64_tz_fillna(self, tz):
500577
"2011-01-02 10:00",
501578
],
502579
tz=tz,
503-
)
580+
).as_unit(unit)
504581
expected = Series(idx)
505582
tm.assert_series_equal(expected, result)
506583
tm.assert_series_equal(isna(ser), null_loc)
@@ -514,7 +591,7 @@ def test_datetime64_tz_fillna(self, tz):
514591
"2011-01-02 10:00",
515592
],
516593
tz=tz,
517-
)
594+
).as_unit(unit)
518595
expected = Series(idx)
519596
tm.assert_series_equal(expected, result)
520597
tm.assert_series_equal(isna(ser), null_loc)
@@ -562,7 +639,7 @@ def test_datetime64_tz_fillna(self, tz):
562639
Timestamp("2011-01-03 10:00", tz=tz),
563640
Timestamp("2011-01-04 10:00", tz=tz),
564641
]
565-
)
642+
).dt.as_unit(unit)
566643
tm.assert_series_equal(expected, result)
567644
tm.assert_series_equal(isna(ser), null_loc)
568645

@@ -589,7 +666,7 @@ def test_datetime64_tz_fillna(self, tz):
589666
Timestamp("2011-01-03 10:00", tz=tz),
590667
Timestamp("2013-01-01", tz="US/Pacific").tz_convert(tz),
591668
]
592-
)
669+
).dt.as_unit(unit)
593670
tm.assert_series_equal(expected, result)
594671
tm.assert_series_equal(isna(ser), null_loc)
595672

0 commit comments

Comments
 (0)