Skip to content

Commit 1779155

Browse files
authored
BUG: Series.where not casting None to nan (#39761)
1 parent 0a88eaa commit 1779155

File tree

4 files changed

+73
-50
lines changed

4 files changed

+73
-50
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,7 @@ Other
451451
- Bug in constructing a :class:`Series` from a list and a :class:`PandasDtype` (:issue:`39357`)
452452
- Bug in :class:`Styler` which caused CSS to duplicate on multiple renders. (:issue:`39395`)
453453
- ``inspect.getmembers(Series)`` no longer raises an ``AbstractMethodError`` (:issue:`38782`)
454+
- Bug in :meth:`Series.where` with numeric dtype and ``other = None`` not casting to ``nan`` (:issue:`39761`)
454455
- :meth:`Index.where` behavior now mirrors :meth:`Index.putmask` behavior, i.e. ``index.where(mask, other)`` matches ``index.putmask(~mask, other)`` (:issue:`39412`)
455456
- Bug in :func:`pandas.testing.assert_series_equal`, :func:`pandas.testing.assert_frame_equal`, :func:`pandas.testing.assert_index_equal` and :func:`pandas.testing.assert_extension_array_equal` incorrectly raising when an attribute has an unrecognized NA type (:issue:`39461`)
456457
- Bug in :class:`Styler` where ``subset`` arg in methods raised an error for some valid multiindex slices (:issue:`33562`)

pandas/core/internals/blocks.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
)
4949
from pandas.core.dtypes.dtypes import CategoricalDtype, ExtensionDtype, PandasDtype
5050
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndex, ABCPandasArray, ABCSeries
51-
from pandas.core.dtypes.missing import isna
51+
from pandas.core.dtypes.missing import is_valid_na_for_dtype, isna
5252

5353
import pandas.core.algorithms as algos
5454
from pandas.core.array_algos.putmask import (
@@ -1298,6 +1298,9 @@ def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]:
12981298

12991299
cond = _extract_bool_array(cond)
13001300

1301+
if is_valid_na_for_dtype(other, self.dtype) and not self.is_object:
1302+
other = self.fill_value
1303+
13011304
if cond.ravel("K").all():
13021305
result = values
13031306
else:

pandas/tests/indexing/test_indexing.py

-47
Original file line numberDiff line numberDiff line change
@@ -837,53 +837,6 @@ def test_label_indexing_on_nan(self):
837837
assert result2 == expected
838838

839839

840-
class TestSeriesNoneCoercion:
841-
EXPECTED_RESULTS = [
842-
# For numeric series, we should coerce to NaN.
843-
([1, 2, 3], [np.nan, 2, 3]),
844-
([1.0, 2.0, 3.0], [np.nan, 2.0, 3.0]),
845-
# For datetime series, we should coerce to NaT.
846-
(
847-
[datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
848-
[NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)],
849-
),
850-
# For objects, we should preserve the None value.
851-
(["foo", "bar", "baz"], [None, "bar", "baz"]),
852-
]
853-
854-
@pytest.mark.parametrize("start_data,expected_result", EXPECTED_RESULTS)
855-
def test_coercion_with_setitem(self, start_data, expected_result):
856-
start_series = Series(start_data)
857-
start_series[0] = None
858-
859-
expected_series = Series(expected_result)
860-
tm.assert_series_equal(start_series, expected_series)
861-
862-
@pytest.mark.parametrize("start_data,expected_result", EXPECTED_RESULTS)
863-
def test_coercion_with_loc_setitem(self, start_data, expected_result):
864-
start_series = Series(start_data)
865-
start_series.loc[0] = None
866-
867-
expected_series = Series(expected_result)
868-
tm.assert_series_equal(start_series, expected_series)
869-
870-
@pytest.mark.parametrize("start_data,expected_result", EXPECTED_RESULTS)
871-
def test_coercion_with_setitem_and_series(self, start_data, expected_result):
872-
start_series = Series(start_data)
873-
start_series[start_series == start_series[0]] = None
874-
875-
expected_series = Series(expected_result)
876-
tm.assert_series_equal(start_series, expected_series)
877-
878-
@pytest.mark.parametrize("start_data,expected_result", EXPECTED_RESULTS)
879-
def test_coercion_with_loc_and_series(self, start_data, expected_result):
880-
start_series = Series(start_data)
881-
start_series.loc[start_series == start_series[0]] = None
882-
883-
expected_series = Series(expected_result)
884-
tm.assert_series_equal(start_series, expected_series)
885-
886-
887840
class TestDataframeNoneCoercion:
888841
EXPECTED_SINGLE_ROW_RESULTS = [
889842
# For numeric series, we should coerce to NaN.

pandas/tests/series/indexing/test_setitem.py

+68-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from datetime import date
1+
from datetime import date, datetime
22

33
import numpy as np
44
import pytest
@@ -297,7 +297,12 @@ def _check_inplace(self, is_inplace, orig, arr, obj):
297297
# We are not (yet) checking whether setting is inplace or not
298298
pass
299299
elif is_inplace:
300-
assert obj._values is arr
300+
if arr.dtype.kind in ["m", "M"]:
301+
# We may not have the same DTA/TDA, but will have the same
302+
# underlying data
303+
assert arr._data is obj._values._data
304+
else:
305+
assert obj._values is arr
301306
else:
302307
# otherwise original array should be unchanged
303308
tm.assert_equal(arr, orig._values)
@@ -635,6 +640,37 @@ def is_inplace(self):
635640
return True
636641

637642

643+
class TestSetitemNATimedelta64Dtype(SetitemCastingEquivalents):
644+
# some nat-like values should be cast to timedelta64 when inserting
645+
# into a timedelta64 series. Others should coerce to object
646+
# and retain their dtypes.
647+
648+
@pytest.fixture
649+
def obj(self):
650+
return Series([0, 1, 2], dtype="m8[ns]")
651+
652+
@pytest.fixture(
653+
params=[NaT, np.timedelta64("NaT", "ns"), np.datetime64("NaT", "ns")]
654+
)
655+
def val(self, request):
656+
return request.param
657+
658+
@pytest.fixture
659+
def is_inplace(self, val):
660+
# cast to object iff val is datetime64("NaT")
661+
return val is NaT or val.dtype.kind == "m"
662+
663+
@pytest.fixture
664+
def expected(self, obj, val, is_inplace):
665+
dtype = obj.dtype if is_inplace else object
666+
expected = Series([val] + list(obj[1:]), dtype=dtype)
667+
return expected
668+
669+
@pytest.fixture
670+
def key(self):
671+
return 0
672+
673+
638674
class TestSetitemMismatchedTZCastsToObject(SetitemCastingEquivalents):
639675
# GH#24024
640676
@pytest.fixture
@@ -659,3 +695,33 @@ def expected(self):
659695
dtype=object,
660696
)
661697
return expected
698+
699+
700+
@pytest.mark.parametrize(
701+
"obj,expected",
702+
[
703+
# For numeric series, we should coerce to NaN.
704+
(Series([1, 2, 3]), Series([np.nan, 2, 3])),
705+
(Series([1.0, 2.0, 3.0]), Series([np.nan, 2.0, 3.0])),
706+
# For datetime series, we should coerce to NaT.
707+
(
708+
Series([datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)]),
709+
Series([NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)]),
710+
),
711+
# For objects, we should preserve the None value.
712+
(Series(["foo", "bar", "baz"]), Series([None, "bar", "baz"])),
713+
],
714+
)
715+
class TestSeriesNoneCoercion(SetitemCastingEquivalents):
716+
@pytest.fixture
717+
def key(self):
718+
return 0
719+
720+
@pytest.fixture
721+
def val(self):
722+
return None
723+
724+
@pytest.fixture
725+
def is_inplace(self, obj):
726+
# This is specific to the 4 cases currently implemented for this class.
727+
return obj.dtype.kind != "i"

0 commit comments

Comments
 (0)