Skip to content

BUG: Series.where with incompatible NA value #44697

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 31 commits into from
Dec 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
ac09146
BUG: IntegerArray/FloatingArray constructors mismatched NAs
jbrockmendel Nov 18, 2021
1166725
Whatsnew, GH ref
jbrockmendel Nov 18, 2021
21b6977
mypy fixup
jbrockmendel Nov 18, 2021
742d321
Merge branch 'master' into bug-nullable-construction
jbrockmendel Nov 20, 2021
020c4e2
Merge branch 'master' into bug-nullable-construction
jbrockmendel Nov 20, 2021
a4d89ce
xfail on old numpy
jbrockmendel Nov 20, 2021
d322af3
xfail ArrayManager
jbrockmendel Nov 20, 2021
67d615d
update tested expception message for py310
jbrockmendel Nov 20, 2021
6350d8e
Merge branch 'master' into bug-nullable-construction
jbrockmendel Nov 21, 2021
117aef7
xfail on later numpy
jbrockmendel Nov 21, 2021
0f17b88
Merge branch 'master' into bug-nullable-construction
jbrockmendel Nov 22, 2021
2a2f8d2
use decorator
jbrockmendel Nov 22, 2021
4be676d
Merge branch 'master' into bug-nullable-construction
jbrockmendel Nov 23, 2021
9cd5047
Merge branch 'master' into bug-nullable-construction
jbrockmendel Nov 25, 2021
0d77cea
Merge branch 'master' into bug-nullable-construction
jbrockmendel Nov 27, 2021
48a4531
raise in is_numeric_na
jbrockmendel Nov 27, 2021
745d24f
fixup unused import
jbrockmendel Nov 27, 2021
b357a95
BUG: IntegerArray/FloatingArray constructors mismatched NAs
jbrockmendel Nov 18, 2021
df0d4c5
Whatsnew, GH ref
jbrockmendel Nov 18, 2021
421c4fe
mypy fixup
jbrockmendel Nov 18, 2021
d967af5
BUG: Series.where with incompatible NA value
jbrockmendel Dec 1, 2021
b8def17
Merge branch 'bug-nullable-construction' into bug-where
jbrockmendel Dec 1, 2021
771356a
GH refs
jbrockmendel Dec 1, 2021
d118be8
Merge branch 'master' into bug-where
jbrockmendel Dec 1, 2021
34f65d6
update exception message
jbrockmendel Dec 1, 2021
95059d6
Merge branch 'master' into bug-where
jbrockmendel Dec 1, 2021
e6f6331
xfail on old numpy
jbrockmendel Dec 1, 2021
d96d637
Merge branch 'master' into bug-where
jbrockmendel Dec 2, 2021
06e44fa
remove duplicated note
jbrockmendel Dec 2, 2021
fc158a6
remove duplicated note
jbrockmendel Dec 2, 2021
1c1f534
Merge branch 'master' into bug-where
jbrockmendel Dec 2, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -803,6 +803,7 @@ ExtensionArray
- Avoid raising ``PerformanceWarning`` about fragmented DataFrame when using many columns with an extension dtype (:issue:`44098`)
- Bug in :class:`IntegerArray` and :class:`FloatingArray` construction incorrectly coercing mismatched NA values (e.g. ``np.timedelta64("NaT")``) to numeric NA (:issue:`44514`)
- Bug in :meth:`BooleanArray.__eq__` and :meth:`BooleanArray.__ne__` raising ``TypeError`` on comparison with an incompatible type (like a string). This caused :meth:`DataFrame.replace` to sometimes raise a ``TypeError`` if a nullable boolean column was included (:issue:`44499`)
- Bug in :meth:`Series.where` with ``ExtensionDtype`` when ``other`` is a NA scalar incompatible with the series dtype (e.g. ``NaT`` with a numeric dtype) incorrectly casting to a compatible NA value (:issue:`44697`)
-

Styler
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -10857,7 +10857,7 @@ def interpolate(
def where(
self,
cond,
other=np.nan,
other=lib.no_default,
inplace=False,
axis=None,
level=None,
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8937,7 +8937,7 @@ def _align_series(
def _where(
self,
cond,
other=np.nan,
other=lib.no_default,
inplace=False,
axis=None,
level=None,
Expand Down
17 changes: 10 additions & 7 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -962,6 +962,9 @@ def putmask(self, mask, new) -> list[Block]:
mask, noop = validate_putmask(values.T, mask)
assert not isinstance(new, (ABCIndex, ABCSeries, ABCDataFrame))

if new is lib.no_default:
new = self.fill_value

# if we are passed a scalar None, convert it here
if not self.is_object and is_valid_na_for_dtype(new, self.dtype):
new = self.fill_value
Expand Down Expand Up @@ -1173,6 +1176,9 @@ def where(self, other, cond) -> list[Block]:

icond, noop = validate_putmask(values, ~cond)

if other is lib.no_default:
other = self.fill_value

if is_valid_na_for_dtype(other, self.dtype) and self.dtype != _dtype_obj:
other = self.fill_value

Expand Down Expand Up @@ -1640,13 +1646,8 @@ def where(self, other, cond) -> list[Block]:
other = self._maybe_squeeze_arg(other)
cond = self._maybe_squeeze_arg(cond)

if lib.is_scalar(other) and isna(other):
# The default `other` for Series / Frame is np.nan
# we want to replace that with the correct NA value
# for the type
# error: Item "dtype[Any]" of "Union[dtype[Any], ExtensionDtype]" has no
# attribute "na_value"
other = self.dtype.na_value # type: ignore[union-attr]
if other is lib.no_default:
other = self.fill_value

icond, noop = validate_putmask(self.values, ~cond)
if noop:
Expand Down Expand Up @@ -1741,6 +1742,8 @@ def where(self, other, cond) -> list[Block]:
arr = self.values

cond = extract_bool_array(cond)
if other is lib.no_default:
other = self.fill_value

try:
res_values = arr.T._where(cond, other).T
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -5510,7 +5510,7 @@ def interpolate(
def where(
self,
cond,
other=np.nan,
other=lib.no_default,
inplace=False,
axis=None,
level=None,
Expand Down
12 changes: 7 additions & 5 deletions pandas/tests/arrays/integer/test_construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,11 +133,13 @@ def test_to_integer_array_none_is_nan(a, b):
)
def test_to_integer_array_error(values):
# error in converting existing arrays to IntegerArrays
msg = (
r"(:?.* cannot be converted to an IntegerDtype)"
r"|(invalid literal for int\(\) with base 10: .*)"
r"|(:?values must be a 1D list-like)"
r"|(Cannot pass scalar)"
msg = "|".join(
[
r"cannot be converted to an IntegerDtype",
r"invalid literal for int\(\) with base 10:",
r"values must be a 1D list-like",
r"Cannot pass scalar",
]
)
with pytest.raises((ValueError, TypeError), match=msg):
pd.array(values, dtype="Int64")
Expand Down
64 changes: 64 additions & 0 deletions pandas/tests/frame/indexing/test_where.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import numpy as np
import pytest

from pandas.compat import np_version_under1p19

from pandas.core.dtypes.common import is_scalar

import pandas as pd
Expand Down Expand Up @@ -810,6 +812,68 @@ def test_where_columns_casting():
tm.assert_frame_equal(expected, result)


@pytest.mark.parametrize("as_cat", [True, False])
def test_where_period_invalid_na(frame_or_series, as_cat, request):
# GH#44697
idx = pd.period_range("2016-01-01", periods=3, freq="D")
if as_cat:
idx = idx.astype("category")
obj = frame_or_series(idx)

# NA value that we should *not* cast to Period dtype
tdnat = pd.NaT.to_numpy("m8[ns]")

mask = np.array([True, True, False], ndmin=obj.ndim).T

if as_cat:
msg = (
r"Cannot setitem on a Categorical with a new category \(NaT\), "
"set the categories first"
)
if np_version_under1p19:
mark = pytest.mark.xfail(
reason="When evaluating the f-string to generate the exception "
"message, numpy somehow ends up trying to cast None to int, so "
"ends up raising TypeError but with an unrelated message."
)
request.node.add_marker(mark)
else:
msg = "value should be a 'Period'"

with pytest.raises(TypeError, match=msg):
obj.where(mask, tdnat)

with pytest.raises(TypeError, match=msg):
obj.mask(mask, tdnat)


def test_where_nullable_invalid_na(frame_or_series, any_numeric_ea_dtype):
# GH#44697
arr = pd.array([1, 2, 3], dtype=any_numeric_ea_dtype)
obj = frame_or_series(arr)

mask = np.array([True, True, False], ndmin=obj.ndim).T

msg = "|".join(
[
r"datetime64\[.{1,2}\] cannot be converted to an? (Integer|Floating)Dtype",
r"timedelta64\[.{1,2}\] cannot be converted to an? (Integer|Floating)Dtype",
r"int\(\) argument must be a string, a bytes-like object or a number, "
"not 'NaTType'",
"object cannot be converted to a FloatingDtype",
"'values' contains non-numeric NA",
]
)

for null in tm.NP_NAT_OBJECTS + [pd.NaT]:
# NaT is an NA value that we should *not* cast to pd.NA dtype
with pytest.raises(TypeError, match=msg):
obj.where(mask, null)

with pytest.raises(TypeError, match=msg):
obj.mask(mask, null)


@given(
data=st.one_of(
OPTIONAL_DICTS, OPTIONAL_FLOATS, OPTIONAL_INTS, OPTIONAL_LISTS, OPTIONAL_TEXT
Expand Down