From 3b589b1aa608b514663687699e12a632204fec17 Mon Sep 17 00:00:00 2001 From: Kapil <90269125+kapiliyer@users.noreply.github.com> Date: Mon, 18 Jul 2022 03:59:07 -0400 Subject: [PATCH 1/8] BUG: PeriodIndex fails to handle NA, rather than putting NaT in its place (#46673) --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/_libs/tslibs/nattype.pyx | 3 ++- pandas/tests/arrays/categorical/test_indexing.py | 15 +++++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 9651269963803..c2038eeb42631 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -906,6 +906,7 @@ Indexing - Bug in :meth:`NDFrame.xs`, :meth:`DataFrame.iterrows`, :meth:`DataFrame.loc` and :meth:`DataFrame.iloc` not always propagating metadata (:issue:`28283`) - Bug in :meth:`DataFrame.sum` min_count changes dtype if input contains NaNs (:issue:`46947`) - Bug in :class:`IntervalTree` that lead to an infinite recursion. (:issue:`46658`) +- Bug in :class:`PeriodIndex` raising ``AttributeError`` when indexing on ``NA``, rather than putting ``NaT`` in its place. (:issue:`46673`) - Missing diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 93687abdf9153..c2bda8d97d1b4 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -28,6 +28,7 @@ from numpy cimport int64_t cnp.import_array() cimport pandas._libs.tslibs.util as util +from pandas._libs.missing cimport C_NA from pandas._libs.tslibs.np_datetime cimport ( get_datetime64_value, get_timedelta64_value, @@ -1217,7 +1218,7 @@ cdef inline bint checknull_with_nat(object val): """ Utility to check if a value is a nat or not. """ - return val is None or util.is_nan(val) or val is c_NaT + return val is None or util.is_nan(val) or val is c_NaT or val is C_NA cdef inline bint is_dt64nat(object val): diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index 73ac51c258a94..09a69e65d4b47 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -1,12 +1,16 @@ +import math + import numpy as np import pytest from pandas import ( + NA, Categorical, CategoricalIndex, Index, Interval, IntervalIndex, + NaT, PeriodIndex, Series, Timedelta, @@ -194,6 +198,17 @@ def test_categories_assignments(self): tm.assert_numpy_array_equal(cat.__array__(), exp) tm.assert_index_equal(cat.categories, Index([1, 2, 3])) + @pytest.mark.parametrize( + "null_val", + [None, np.nan, NaT, NA, math.nan, "NaT", "nat", "NAT", "nan", "NaN", "NAN"], + ) + def test_periodindex_on_null_types(self, null_val): + # GH 46673 + result = PeriodIndex(["2022-04-06", "2022-04-07", null_val], freq="D") + expected = PeriodIndex(["2022-04-06", "2022-04-07", "NaT"], dtype="period[D]") + assert type(result[2]) == type(NaT) + tm.assert_index_equal(result, expected) + @pytest.mark.parametrize("new_categories", [[1, 2, 3, 4], [1, 2]]) def test_categories_assignments_wrong_length_raises(self, new_categories): cat = Categorical(["a", "b", "c", "a"]) From 8ba96f227891bbfbe1732a340b77200c39b92f6d Mon Sep 17 00:00:00 2001 From: Kapil <90269125+kapiliyer@users.noreply.github.com> Date: Mon, 18 Jul 2022 14:19:33 -0400 Subject: [PATCH 2/8] BUG: PeriodIndex fails to handle NA, rather than putting NaT in its place (#46673) --- pandas/tests/series/methods/test_astype.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 6bdf93c43c986..47d6cad0e1743 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -446,7 +446,7 @@ def test_astype_string_to_extension_dtype_roundtrip( self, data, dtype, request, nullable_string_dtype ): if dtype == "boolean" or ( - dtype in ("period[M]", "datetime64[ns]", "timedelta64[ns]") and NaT in data + dtype in ("datetime64[ns]", "timedelta64[ns]") and NaT in data ): mark = pytest.mark.xfail( reason="TODO StringArray.astype() with missing values #GH40566" From f2c67236ecb8e80c35574169e0bccaba589b8cfe Mon Sep 17 00:00:00 2001 From: Kapil <90269125+kapiliyer@users.noreply.github.com> Date: Thu, 21 Jul 2022 17:06:03 -0400 Subject: [PATCH 3/8] BUG: Series.astype is unable to handle pd.nan. --- pandas/tests/extension/test_arrow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index ef576692c83b6..0839223aa226b 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1479,7 +1479,7 @@ def test_combine_le(self, data_repeated): def test_combine_add(self, data_repeated, request): pa_dtype = next(data_repeated(1)).dtype.pyarrow_dtype - if pa.types.is_temporal(pa_dtype): + if pa.types.is_temporal(pa_dtype) and pa_dtype != 'duration[ns]': request.node.add_marker( pytest.mark.xfail( raises=TypeError, @@ -1552,7 +1552,7 @@ def test_arith_series_with_scalar( all_arithmetic_operators in ("__add__", "__radd__") and pa.types.is_duration(pa_dtype) or all_arithmetic_operators in ("__sub__", "__rsub__") - and pa.types.is_temporal(pa_dtype) + and pa.types.is_temporal(pa_dtype) and pa_dtype != 'duration[ns]' ) if ( all_arithmetic_operators From 3d9e157a46f0e6668b5a42146da2e21872f8b2a8 Mon Sep 17 00:00:00 2001 From: Kapil <90269125+kapiliyer@users.noreply.github.com> Date: Fri, 22 Jul 2022 02:16:32 -0400 Subject: [PATCH 4/8] BUG: PeriodIndex fails to handle NA, rather than putting NaT in its place (#46673) --- pandas/tests/extension/test_arrow.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 0839223aa226b..17909dbe0995e 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1479,7 +1479,7 @@ def test_combine_le(self, data_repeated): def test_combine_add(self, data_repeated, request): pa_dtype = next(data_repeated(1)).dtype.pyarrow_dtype - if pa.types.is_temporal(pa_dtype) and pa_dtype != 'duration[ns]': + if pa.types.is_temporal(pa_dtype) and pa_dtype != "duration[ns]": request.node.add_marker( pytest.mark.xfail( raises=TypeError, @@ -1552,7 +1552,7 @@ def test_arith_series_with_scalar( all_arithmetic_operators in ("__add__", "__radd__") and pa.types.is_duration(pa_dtype) or all_arithmetic_operators in ("__sub__", "__rsub__") - and pa.types.is_temporal(pa_dtype) and pa_dtype != 'duration[ns]' + and pa.types.is_temporal(pa_dtype) ) if ( all_arithmetic_operators @@ -1589,7 +1589,7 @@ def test_arith_series_with_scalar( elif arrow_temporal_supported: request.node.add_marker( pytest.mark.xfail( - raises=TypeError, + raises=TypeError if pa_dtype != "duration[ns]" else AssertionError, reason=( f"{all_arithmetic_operators} not supported between" f"pd.NA and {pa_dtype} Python scalar" @@ -1655,7 +1655,7 @@ def test_arith_frame_with_scalar( elif arrow_temporal_supported: request.node.add_marker( pytest.mark.xfail( - raises=TypeError, + raises=TypeError if pa_dtype != "duration[ns]" else AssertionError, reason=( f"{all_arithmetic_operators} not supported between" f"pd.NA and {pa_dtype} Python scalar" @@ -1739,7 +1739,7 @@ def test_arith_series_with_array( elif arrow_temporal_supported: request.node.add_marker( pytest.mark.xfail( - raises=TypeError, + raises=TypeError if pa_dtype != "duration[ns]" else AssertionError, reason=( f"{all_arithmetic_operators} not supported between" f"pd.NA and {pa_dtype} Python scalar" From ab1d2a7725d6baf8c735b2158b3e7ea111679bd0 Mon Sep 17 00:00:00 2001 From: Kapil <90269125+kapiliyer@users.noreply.github.com> Date: Fri, 22 Jul 2022 16:52:28 -0400 Subject: [PATCH 5/8] BUG: Series.astype is unable to handle pd.nan (#46377) --- pandas/_libs/tslibs/nattype.pyx | 3 +-- pandas/_libs/tslibs/period.pyx | 3 ++- pandas/_libs/tslibs/timedeltas.pyx | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index c2bda8d97d1b4..93687abdf9153 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -28,7 +28,6 @@ from numpy cimport int64_t cnp.import_array() cimport pandas._libs.tslibs.util as util -from pandas._libs.missing cimport C_NA from pandas._libs.tslibs.np_datetime cimport ( get_datetime64_value, get_timedelta64_value, @@ -1218,7 +1217,7 @@ cdef inline bint checknull_with_nat(object val): """ Utility to check if a value is a nat or not. """ - return val is None or util.is_nan(val) or val is c_NaT or val is C_NA + return val is None or util.is_nan(val) or val is c_NaT cdef inline bint is_dt64nat(object val): diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 3332628627739..9b01bbc433b3b 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -43,6 +43,7 @@ from libc.time cimport ( import_datetime() cimport pandas._libs.tslibs.util as util +from pandas._libs.missing cimport C_NA from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_D, @@ -1470,7 +1471,7 @@ cdef inline int64_t _extract_ordinal(object item, str freqstr, freq) except? -1: cdef: int64_t ordinal - if checknull_with_nat(item): + if checknull_with_nat(item) or item is C_NA: ordinal = NPY_NAT elif util.is_integer_object(item): if item == NPY_NAT: diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 39458c10ad35b..0fa4a25df41ba 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -28,7 +28,6 @@ from cpython.datetime cimport ( import_datetime() - cimport pandas._libs.tslibs.util as util from pandas._libs.tslibs.base cimport ABCTimestamp from pandas._libs.tslibs.conversion cimport ( From ca6c0fdb166d69579d028a61e5b5005888512a2c Mon Sep 17 00:00:00 2001 From: Kapil <90269125+kapiliyer@users.noreply.github.com> Date: Fri, 22 Jul 2022 17:10:14 -0400 Subject: [PATCH 6/8] BUG: PeriodIndex fails to handle NA, rather than putting NaT in its place (#46673) --- pandas/_libs/tslibs/period.pyx | 1 + pandas/tests/extension/test_arrow.py | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 9b01bbc433b3b..e0dc34b12bf5d 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -72,6 +72,7 @@ from pandas._libs.tslibs.timedeltas cimport ( from pandas._libs.tslibs.conversion import DT64NS_DTYPE +from pandas._libs.missing cimport C_NA from pandas._libs.tslibs.dtypes cimport ( FR_ANN, FR_BUS, diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 079db8152c5b3..a2a96da02b2a6 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1587,7 +1587,7 @@ def test_combine_le(self, data_repeated): def test_combine_add(self, data_repeated, request): pa_dtype = next(data_repeated(1)).dtype.pyarrow_dtype - if pa.types.is_temporal(pa_dtype) and pa_dtype != "duration[ns]": + if pa.types.is_temporal(pa_dtype): request.node.add_marker( pytest.mark.xfail( raises=TypeError, @@ -1697,7 +1697,7 @@ def test_arith_series_with_scalar( elif arrow_temporal_supported: request.node.add_marker( pytest.mark.xfail( - raises=TypeError if pa_dtype != "duration[ns]" else AssertionError, + raises=TypeError, reason=( f"{all_arithmetic_operators} not supported between" f"pd.NA and {pa_dtype} Python scalar" @@ -1763,7 +1763,7 @@ def test_arith_frame_with_scalar( elif arrow_temporal_supported: request.node.add_marker( pytest.mark.xfail( - raises=TypeError if pa_dtype != "duration[ns]" else AssertionError, + raises=TypeError, reason=( f"{all_arithmetic_operators} not supported between" f"pd.NA and {pa_dtype} Python scalar" @@ -1847,7 +1847,7 @@ def test_arith_series_with_array( elif arrow_temporal_supported: request.node.add_marker( pytest.mark.xfail( - raises=TypeError if pa_dtype != "duration[ns]" else AssertionError, + raises=TypeError, reason=( f"{all_arithmetic_operators} not supported between" f"pd.NA and {pa_dtype} Python scalar" From 8e53d067cc19208702b51d8b3a9afcac4c1025ff Mon Sep 17 00:00:00 2001 From: Kapil <90269125+kapiliyer@users.noreply.github.com> Date: Fri, 22 Jul 2022 17:13:30 -0400 Subject: [PATCH 7/8] BUG: PeriodIndex fails to handle NA, rather than putting NaT in its place (#46673) --- pandas/_libs/tslibs/period.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index e0dc34b12bf5d..9b01bbc433b3b 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -72,7 +72,6 @@ from pandas._libs.tslibs.timedeltas cimport ( from pandas._libs.tslibs.conversion import DT64NS_DTYPE -from pandas._libs.missing cimport C_NA from pandas._libs.tslibs.dtypes cimport ( FR_ANN, FR_BUS, From 3458d3d5ac164fa6958d38bc30e933125e0eb243 Mon Sep 17 00:00:00 2001 From: Kapil <90269125+kapiliyer@users.noreply.github.com> Date: Mon, 25 Jul 2022 14:48:30 -0400 Subject: [PATCH 8/8] BUG: PeriodIndex fails to handle NA, rather than putting NaT in its place (#46673) --- pandas/_libs/tslibs/timedeltas.pyx | 1 + pandas/tests/arrays/categorical/test_indexing.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 0fa4a25df41ba..39458c10ad35b 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -28,6 +28,7 @@ from cpython.datetime cimport ( import_datetime() + cimport pandas._libs.tslibs.util as util from pandas._libs.tslibs.base cimport ABCTimestamp from pandas._libs.tslibs.conversion cimport ( diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index 09a69e65d4b47..940aa5ffff040 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -206,7 +206,7 @@ def test_periodindex_on_null_types(self, null_val): # GH 46673 result = PeriodIndex(["2022-04-06", "2022-04-07", null_val], freq="D") expected = PeriodIndex(["2022-04-06", "2022-04-07", "NaT"], dtype="period[D]") - assert type(result[2]) == type(NaT) + assert result[2] is NaT tm.assert_index_equal(result, expected) @pytest.mark.parametrize("new_categories", [[1, 2, 3, 4], [1, 2]])