From 53edcccd1903389178eacb4ce9ae45f7c6b0c0df Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 2 Jun 2023 13:54:07 -0700 Subject: [PATCH 1/6] BUG: reindex with expansion and non-nanosecond dtype --- doc/source/whatsnew/v2.0.3.rst | 1 + pandas/core/dtypes/cast.py | 6 ++++-- pandas/tests/series/methods/test_reindex.py | 12 ++++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.0.3.rst b/doc/source/whatsnew/v2.0.3.rst index 2c63d7d20ed1c..9ce00b9d80faa 100644 --- a/doc/source/whatsnew/v2.0.3.rst +++ b/doc/source/whatsnew/v2.0.3.rst @@ -21,6 +21,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ +- Bug in :func:`Series.reindex` when expanding a non-nanosecond datetime or timedelta :class:`Series` would not fill with ``NaT`` correctly (:issue:`53497`) - Bug in :func:`read_csv` when defining ``dtype`` with ``bool[pyarrow]`` for the ``"c"`` and ``"python"`` engines (:issue:`53390`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 831b368f58225..3ced6f5e9f73e 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -580,8 +580,10 @@ def maybe_promote(dtype: np.dtype, fill_value=np.nan): # if fill_value is not hashable (required for caching) dtype, fill_value = _maybe_promote(dtype, fill_value) - if dtype == _dtype_obj and orig is not None: - # GH#51592 restore our potentially non-canonical fill_value + if (dtype == _dtype_obj and orig is not None) or ( + np.isnat(orig) and np.datetime_data(orig)[0] != "ns" + ): + # GH#51592,53497 restore our potentially non-canonical fill_value fill_value = orig return dtype, fill_value diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index 04a15b4e95e0a..33e7cdcfd04c1 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -12,6 +12,7 @@ NaT, Period, PeriodIndex, + RangeIndex, Series, Timedelta, Timestamp, @@ -422,3 +423,14 @@ def test_reindexing_with_float64_NA_log(): result_log = np.log(s_reindex) expected_log = Series([0, np.NaN, np.NaN], dtype=Float64Dtype()) tm.assert_series_equal(result_log, expected_log) + + +@pytest.mark.parametrize("dtype", ["timedelta64", "datetime64"]) +def test_reindex_expand_nonnano_nat(dtype): + # GH 53497 + ser = Series(np.array([1], dtype=f"{dtype}[s]")) + result = ser.reindex(RangeIndex(2)) + expected = Series( + np.array([1, getattr(np, dtype)("nat", "s")], dtype=f"{dtype}[s]") + ) + tm.assert_series_equal(result, expected) From f084b1b39af58758c258eae7319b4c6a15e315e2 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 2 Jun 2023 14:52:12 -0700 Subject: [PATCH 2/6] Restrict to timelike types --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 3ced6f5e9f73e..0480e002c2f5c 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -581,7 +581,7 @@ def maybe_promote(dtype: np.dtype, fill_value=np.nan): dtype, fill_value = _maybe_promote(dtype, fill_value) if (dtype == _dtype_obj and orig is not None) or ( - np.isnat(orig) and np.datetime_data(orig)[0] != "ns" + dtype.kind in "Mm" and np.isnat(orig) and np.datetime_data(orig)[0] != "ns" ): # GH#51592,53497 restore our potentially non-canonical fill_value fill_value = orig From f362c2035f9b8cef8904709a12868c46ba94ac94 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 2 Jun 2023 16:00:45 -0700 Subject: [PATCH 3/6] Check earlier --- pandas/core/dtypes/cast.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 0480e002c2f5c..8a8a66faa9fca 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -562,9 +562,14 @@ def maybe_promote(dtype: np.dtype, fill_value=np.nan): If fill_value is a non-scalar and dtype is not object. """ orig = fill_value + orig_is_nat = False if checknull(fill_value): # https://github.com/pandas-dev/pandas/pull/39692#issuecomment-1441051740 # avoid cache misses with NaN/NaT values that are not singletons + try: + orig_is_nat = np.isnat(fill_value) + except TypeError: + pass fill_value = _canonical_nans.get(type(fill_value), fill_value) # for performance, we are using a cached version of the actual implementation @@ -581,7 +586,7 @@ def maybe_promote(dtype: np.dtype, fill_value=np.nan): dtype, fill_value = _maybe_promote(dtype, fill_value) if (dtype == _dtype_obj and orig is not None) or ( - dtype.kind in "Mm" and np.isnat(orig) and np.datetime_data(orig)[0] != "ns" + orig_is_nat and np.datetime_data(orig)[0] != "ns" ): # GH#51592,53497 restore our potentially non-canonical fill_value fill_value = orig From b4f2bec4c43bfe5832142779a8d09936185549a9 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 5 Jun 2023 11:26:36 -0700 Subject: [PATCH 4/6] handle NA --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 8a8a66faa9fca..eaaee4bc1e4b2 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -586,7 +586,7 @@ def maybe_promote(dtype: np.dtype, fill_value=np.nan): dtype, fill_value = _maybe_promote(dtype, fill_value) if (dtype == _dtype_obj and orig is not None) or ( - orig_is_nat and np.datetime_data(orig)[0] != "ns" + orig_is_nat is True and np.datetime_data(orig)[0] != "ns" ): # GH#51592,53497 restore our potentially non-canonical fill_value fill_value = orig From 5b7ed241c117943a0d0268ca1f8b07dd4351e4fd Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 5 Jun 2023 15:04:09 -0700 Subject: [PATCH 5/6] handle NA --- pandas/core/dtypes/cast.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index eaaee4bc1e4b2..05d6ce3ce0af0 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -570,6 +570,9 @@ def maybe_promote(dtype: np.dtype, fill_value=np.nan): orig_is_nat = np.isnat(fill_value) except TypeError: pass + if orig_is_nat is NA: + orig_is_nat = False + fill_value = _canonical_nans.get(type(fill_value), fill_value) # for performance, we are using a cached version of the actual implementation @@ -586,7 +589,7 @@ def maybe_promote(dtype: np.dtype, fill_value=np.nan): dtype, fill_value = _maybe_promote(dtype, fill_value) if (dtype == _dtype_obj and orig is not None) or ( - orig_is_nat is True and np.datetime_data(orig)[0] != "ns" + orig_is_nat and np.datetime_data(orig)[0] != "ns" ): # GH#51592,53497 restore our potentially non-canonical fill_value fill_value = orig From 2d42813f42ec7e983becd11a128c7b9a4aaa935b Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 5 Jun 2023 15:50:31 -0700 Subject: [PATCH 6/6] Better check --- pandas/core/dtypes/cast.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 05d6ce3ce0af0..be23911165138 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -566,12 +566,11 @@ def maybe_promote(dtype: np.dtype, fill_value=np.nan): if checknull(fill_value): # https://github.com/pandas-dev/pandas/pull/39692#issuecomment-1441051740 # avoid cache misses with NaN/NaT values that are not singletons - try: - orig_is_nat = np.isnat(fill_value) - except TypeError: - pass - if orig_is_nat is NA: - orig_is_nat = False + if fill_value is not NA: + try: + orig_is_nat = np.isnat(fill_value) + except TypeError: + pass fill_value = _canonical_nans.get(type(fill_value), fill_value)