From 21e4c7aa39c5eab810cf37b9e4f0b1b36de7dcb3 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 11 Feb 2021 15:05:42 -0800 Subject: [PATCH 1/3] TST: use SetitemCastingEquivalents more --- pandas/core/internals/blocks.py | 5 +- pandas/tests/indexing/test_indexing.py | 47 ------------- pandas/tests/series/indexing/test_setitem.py | 70 +++++++++++++++++++- 3 files changed, 72 insertions(+), 50 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 30e94b99b53c9..2b09c283eed9f 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -48,7 +48,7 @@ ) from pandas.core.dtypes.dtypes import CategoricalDtype, ExtensionDtype, PandasDtype from pandas.core.dtypes.generic import ABCDataFrame, ABCIndex, ABCPandasArray, ABCSeries -from pandas.core.dtypes.missing import isna +from pandas.core.dtypes.missing import is_valid_na_for_dtype, isna import pandas.core.algorithms as algos from pandas.core.array_algos.putmask import ( @@ -1298,6 +1298,9 @@ def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]: cond = _extract_bool_array(cond) + if is_valid_na_for_dtype(other, self.dtype) and not self.is_object: + other = self.fill_value + if cond.ravel("K").all(): result = values else: diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index e9ae7bb056041..dcd073681cecf 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -837,53 +837,6 @@ def test_label_indexing_on_nan(self): assert result2 == expected -class TestSeriesNoneCoercion: - EXPECTED_RESULTS = [ - # For numeric series, we should coerce to NaN. - ([1, 2, 3], [np.nan, 2, 3]), - ([1.0, 2.0, 3.0], [np.nan, 2.0, 3.0]), - # For datetime series, we should coerce to NaT. - ( - [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], - [NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)], - ), - # For objects, we should preserve the None value. - (["foo", "bar", "baz"], [None, "bar", "baz"]), - ] - - @pytest.mark.parametrize("start_data,expected_result", EXPECTED_RESULTS) - def test_coercion_with_setitem(self, start_data, expected_result): - start_series = Series(start_data) - start_series[0] = None - - expected_series = Series(expected_result) - tm.assert_series_equal(start_series, expected_series) - - @pytest.mark.parametrize("start_data,expected_result", EXPECTED_RESULTS) - def test_coercion_with_loc_setitem(self, start_data, expected_result): - start_series = Series(start_data) - start_series.loc[0] = None - - expected_series = Series(expected_result) - tm.assert_series_equal(start_series, expected_series) - - @pytest.mark.parametrize("start_data,expected_result", EXPECTED_RESULTS) - def test_coercion_with_setitem_and_series(self, start_data, expected_result): - start_series = Series(start_data) - start_series[start_series == start_series[0]] = None - - expected_series = Series(expected_result) - tm.assert_series_equal(start_series, expected_series) - - @pytest.mark.parametrize("start_data,expected_result", EXPECTED_RESULTS) - def test_coercion_with_loc_and_series(self, start_data, expected_result): - start_series = Series(start_data) - start_series.loc[start_series == start_series[0]] = None - - expected_series = Series(expected_result) - tm.assert_series_equal(start_series, expected_series) - - class TestDataframeNoneCoercion: EXPECTED_SINGLE_ROW_RESULTS = [ # For numeric series, we should coerce to NaN. diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index da78dec4915c9..3a9ec0948b29a 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -1,4 +1,4 @@ -from datetime import date +from datetime import date, datetime import numpy as np import pytest @@ -297,7 +297,12 @@ def _check_inplace(self, is_inplace, orig, arr, obj): # We are not (yet) checking whether setting is inplace or not pass elif is_inplace: - assert obj._values is arr + if arr.dtype.kind in ["m", "M"]: + # We may not have the same DTA/TDA, but will have the same + # underlying data + assert arr._data is obj._values._data + else: + assert obj._values is arr else: # otherwise original array should be unchanged tm.assert_equal(arr, orig._values) @@ -635,6 +640,37 @@ def is_inplace(self): return True +class TestSetitemNATimedelta64Dtype(SetitemCastingEquivalents): + # some nat-like values should be cast to timedelta64 when inserting + # into a timedelta64 series. Others should coerce to object + # and retain their dtypes. + + @pytest.fixture + def obj(self): + return Series([0, 1, 2], dtype="m8[ns]") + + @pytest.fixture( + params=[NaT, np.timedelta64("NaT", "ns"), np.datetime64("NaT", "ns")] + ) + def val(self, request): + return request.param + + @pytest.fixture + def is_inplace(self, val): + # cast to object iff val is datetime64("NaT") + return val is NaT or val.dtype.kind == "m" + + @pytest.fixture + def expected(self, obj, val, is_inplace): + dtype = obj.dtype if is_inplace else object + expected = Series([val] + list(obj[1:]), dtype=dtype) + return expected + + @pytest.fixture + def key(self): + return 0 + + class TestSetitemMismatchedTZCastsToObject(SetitemCastingEquivalents): # GH#24024 @pytest.fixture @@ -659,3 +695,33 @@ def expected(self): dtype=object, ) return expected + + +@pytest.mark.parametrize( + "obj,expected", + [ + # For numeric series, we should coerce to NaN. + (Series([1, 2, 3]), Series([np.nan, 2, 3])), + (Series([1.0, 2.0, 3.0]), Series([np.nan, 2.0, 3.0])), + # For datetime series, we should coerce to NaT. + ( + Series([datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)]), + Series([NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)]), + ), + # For objects, we should preserve the None value. + (Series(["foo", "bar", "baz"]), Series([None, "bar", "baz"])), + ], +) +class TestSeriesNoneCoercion(SetitemCastingEquivalents): + @pytest.fixture + def key(self): + return 0 + + @pytest.fixture + def val(self): + return None + + @pytest.fixture + def is_inplace(self, obj): + # This is specific to the 4 cases currently implemented for this class. + return obj.dtype.kind != "i" From f4e9d26edeee3774bf6a2cc78db75e667e3b4fcc Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 11 Feb 2021 15:07:45 -0800 Subject: [PATCH 2/3] whatsnew --- doc/source/whatsnew/v1.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 09e1853429d9f..a17f71ffb6625 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -450,6 +450,7 @@ Other - Bug in constructing a :class:`Series` from a list and a :class:`PandasDtype` (:issue:`39357`) - Bug in :class:`Styler` which caused CSS to duplicate on multiple renders. (:issue:`39395`) - ``inspect.getmembers(Series)`` no longer raises an ``AbstractMethodError`` (:issue:`38782`) +- Bug in :meth:`Series.where` with numeric dtype and ``other = None`` not casting to ``nan`` (:issue:`??`) - :meth:`Index.where` behavior now mirrors :meth:`Index.putmask` behavior, i.e. ``index.where(mask, other)`` matches ``index.putmask(~mask, other)`` (:issue:`39412`) - Bug in :func:`pandas.testing.assert_series_equal`, :func:`pandas.testing.assert_frame_equal`, :func:`pandas.testing.assert_index_equal` and :func:`pandas.testing.assert_extension_array_equal` incorrectly raising when an attribute has an unrecognized NA type (:issue:`39461`) - Bug in :class:`Styler` where ``subset`` arg in methods raised an error for some valid multiindex slices (:issue:`33562`) From cf9b39500fe34dc3f8837a2e1d6908a1b34589dd Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 11 Feb 2021 15:09:19 -0800 Subject: [PATCH 3/3] GH ref --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index a17f71ffb6625..86548d2d59f1b 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -450,7 +450,7 @@ Other - Bug in constructing a :class:`Series` from a list and a :class:`PandasDtype` (:issue:`39357`) - Bug in :class:`Styler` which caused CSS to duplicate on multiple renders. (:issue:`39395`) - ``inspect.getmembers(Series)`` no longer raises an ``AbstractMethodError`` (:issue:`38782`) -- Bug in :meth:`Series.where` with numeric dtype and ``other = None`` not casting to ``nan`` (:issue:`??`) +- Bug in :meth:`Series.where` with numeric dtype and ``other = None`` not casting to ``nan`` (:issue:`39761`) - :meth:`Index.where` behavior now mirrors :meth:`Index.putmask` behavior, i.e. ``index.where(mask, other)`` matches ``index.putmask(~mask, other)`` (:issue:`39412`) - Bug in :func:`pandas.testing.assert_series_equal`, :func:`pandas.testing.assert_frame_equal`, :func:`pandas.testing.assert_index_equal` and :func:`pandas.testing.assert_extension_array_equal` incorrectly raising when an attribute has an unrecognized NA type (:issue:`39461`) - Bug in :class:`Styler` where ``subset`` arg in methods raised an error for some valid multiindex slices (:issue:`33562`)