Skip to content

API: Series[bool][key] = np.nan -> cast to object #38709

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Jan 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
6ee5c5f
TST: implement tm.check_setitem_equivalents
jbrockmendel Dec 24, 2020
33e39f9
re-write as fixturized
jbrockmendel Dec 26, 2020
5896615
CLN: algos.searchsorted (#38686)
jbrockmendel Dec 24, 2020
90a76ce
REF: simplify coerce_to_target_dtype (#38683)
jbrockmendel Dec 24, 2020
0827cd0
PERF: fix assert_frame_equal can be very slow (#38202)
ivanovmg Dec 24, 2020
803f495
API: cast to object when setting np.nan into Series[bool]
jbrockmendel Dec 27, 2020
055617b
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Dec 27, 2020
3be818f
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Dec 28, 2020
51ad10d
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Dec 29, 2020
279564b
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Jan 2, 2021
e3b2cec
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Jan 4, 2021
c4eb8e1
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Jan 6, 2021
ce4311a
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Jan 10, 2021
9e5cb27
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Jan 16, 2021
c01661d
fix putmask
jbrockmendel Jan 17, 2021
23c69dc
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Jan 17, 2021
198768e
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Jan 18, 2021
7c6fdb7
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Jan 19, 2021
e0c0194
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Jan 20, 2021
f02f514
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Jan 25, 2021
0934ad1
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Jan 27, 2021
d966441
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Jan 27, 2021
d2ac2b3
whatsnew, test, maybe_promote->find_common_type
jbrockmendel Jan 27, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,45 @@ Preserve dtypes in :meth:`~pandas.DataFrame.combine_first`
combined.dtypes


.. _whatsnew_130.notable_bug_fixes.setitem_with_bool_casting:

Consistent Casting With Setting Into Boolean Series
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Setting non-boolean values into a :class:`Series with ``dtype=bool`` consistently
cast to ``dtype=object`` (:issue:`38709`)

.. ipython:: python

orig = pd.Series([True, False])
ser = orig.copy()
ser.iloc[1] = np.nan
ser2 = orig.copy()
ser2.iloc[1] = 2.0

*pandas 1.2.x*

.. code-block:: ipython

In [1]: ser
Out [1]:
0 1.0
1 NaN
dtype: float64

In [2]:ser2
Out [2]:
0 True
1 2.0
dtype: object

*pandas 1.3.0*

.. ipython:: python

ser
ser2

.. _whatsnew_130.api_breaking.deps:

Increased minimum versions for dependencies
Expand Down
6 changes: 2 additions & 4 deletions pandas/core/array_algos/putmask.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pandas._libs import lib
from pandas._typing import ArrayLike

from pandas.core.dtypes.cast import convert_scalar_for_putitemlike, maybe_promote
from pandas.core.dtypes.cast import convert_scalar_for_putitemlike, find_common_type
from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype, is_list_like
from pandas.core.dtypes.missing import isna_compat

Expand Down Expand Up @@ -106,9 +106,7 @@ def putmask_smart(values: np.ndarray, mask: np.ndarray, new) -> np.ndarray:
# preserves dtype if possible
return _putmask_preserve(values, new, mask)

# change the dtype if needed
dtype, _ = maybe_promote(new.dtype)

dtype = find_common_type([values.dtype, new.dtype])
values = values.astype(dtype)

return _putmask_preserve(values, new, mask)
Expand Down
40 changes: 4 additions & 36 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
convert_scalar_for_putitemlike,
find_common_type,
infer_dtype_from,
infer_dtype_from_scalar,
maybe_downcast_numeric,
maybe_downcast_to_dtype,
maybe_promote,
Expand Down Expand Up @@ -904,24 +903,7 @@ def setitem(self, indexer, value):
values = self.values
if not self._can_hold_element(value):
# current dtype cannot store value, coerce to common dtype
# TODO: can we just use coerce_to_target_dtype for all this
if hasattr(value, "dtype"):
dtype = value.dtype

elif lib.is_scalar(value) and not isna(value):
dtype, _ = infer_dtype_from_scalar(value, pandas_dtype=True)

else:
# e.g. we are bool dtype and value is nan
# TODO: watch out for case with listlike value and scalar/empty indexer
dtype, _ = maybe_promote(np.array(value).dtype)
return self.astype(dtype).setitem(indexer, value)

dtype = find_common_type([values.dtype, dtype])
assert not is_dtype_equal(self.dtype, dtype)
# otherwise should have _can_hold_element

return self.astype(dtype).setitem(indexer, value)
return self.coerce_to_target_dtype(value).setitem(indexer, value)

if self.dtype.kind in ["m", "M"]:
arr = self.array_values().T
Expand Down Expand Up @@ -1310,29 +1292,15 @@ def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]:
else:
# see if we can operate on the entire block, or need item-by-item
# or if we are a single block (ndim == 1)
if (
(self.dtype.kind in ["b", "i", "u"])
and lib.is_float(other)
and np.isnan(other)
):
# GH#3733 special case to avoid object-dtype casting
# and go through numexpr path instead.
# In integer case, np.where will cast to floats
pass
elif not self._can_hold_element(other):
if not self._can_hold_element(other):
# we cannot coerce, return a compat dtype
# we are explicitly ignoring errors
block = self.coerce_to_target_dtype(other)
blocks = block.where(orig_other, cond, errors=errors, axis=axis)
return self._maybe_downcast(blocks, "infer")

if not (
(self.dtype.kind in ["b", "i", "u"])
and lib.is_float(other)
and np.isnan(other)
):
# convert datetime to datetime64, timedelta to timedelta64
other = convert_scalar_for_putitemlike(other, values.dtype)
# convert datetime to datetime64, timedelta to timedelta64
other = convert_scalar_for_putitemlike(other, values.dtype)

# By the time we get here, we should have all Series/Index
# args extracted to ndarray
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/frame/indexing/test_mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,13 @@ def test_mask_callable(self):
tm.assert_frame_equal(result, exp)
tm.assert_frame_equal(result, (df + 2).mask((df + 2) > 8, (df + 2) + 10))

def test_mask_dtype_conversion(self):
def test_mask_dtype_bool_conversion(self):
# GH#3733
df = DataFrame(data=np.random.randn(100, 50))
df = df.where(df > 0) # create nans
bools = df > 0
mask = isna(df)
expected = bools.astype(float).mask(mask)
expected = bools.astype(object).mask(mask)
result = bools.mask(mask)
tm.assert_frame_equal(result, expected)

Expand Down
68 changes: 44 additions & 24 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,41 +242,47 @@ def test_setitem_callable_other(self):
@pytest.mark.parametrize(
"obj,expected,key",
[
(
pytest.param(
# these induce dtype changes
Series([2, 3, 4, 5, 6, 7, 8, 9, 10]),
Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan]),
slice(None, None, 2),
id="int_series_slice_key_step",
),
(
# gets coerced to float, right?
pytest.param(
Series([True, True, False, False]),
Series([np.nan, 1, np.nan, 0]),
Series([np.nan, True, np.nan, False], dtype=object),
slice(None, None, 2),
id="bool_series_slice_key_step",
),
(
pytest.param(
# these induce dtype changes
Series(np.arange(10)),
Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, 9]),
slice(None, 5),
id="int_series_slice_key",
),
(
pytest.param(
# changes dtype GH#4463
Series([1, 2, 3]),
Series([np.nan, 2, 3]),
0,
id="int_series_int_key",
),
(
pytest.param(
# changes dtype GH#4463
Series([False]),
Series([np.nan]),
Series([np.nan], dtype=object),
# TODO: maybe go to float64 since we are changing the _whole_ Series?
0,
id="bool_series_int_key_change_all",
),
(
pytest.param(
# changes dtype GH#4463
Series([False, True]),
Series([np.nan, 1.0]),
Series([np.nan, True], dtype=object),
0,
id="bool_series_int_key",
),
],
)
Expand All @@ -289,45 +295,56 @@ class TestSetitemCastingEquivalents:
- the setitem does not expand the obj
"""

def test_int_key(self, obj, key, expected, indexer_sli):
@pytest.fixture(params=[np.nan, np.float64("NaN")])
def val(self, request):
"""
One python float NaN, one np.float64. Only np.float64 has a `dtype`
attribute.
"""
return request.param

def test_int_key(self, obj, key, expected, val, indexer_sli):
if not isinstance(key, int):
return

obj = obj.copy()
indexer_sli(obj)[key] = np.nan
indexer_sli(obj)[key] = val
tm.assert_series_equal(obj, expected)

def test_slice_key(self, obj, key, expected, indexer_si):
def test_slice_key(self, obj, key, expected, val, indexer_si):
# Note: no .loc because that handles slice edges differently
obj = obj.copy()
indexer_si(obj)[key] = np.nan
indexer_si(obj)[key] = val
tm.assert_series_equal(obj, expected)

def test_intlist_key(self, obj, key, expected, indexer_sli):
def test_intlist_key(self, obj, key, expected, val, indexer_sli):
ilkey = list(range(len(obj)))[key]

obj = obj.copy()
indexer_sli(obj)[ilkey] = np.nan
indexer_sli(obj)[ilkey] = val
tm.assert_series_equal(obj, expected)

def test_mask_key(self, obj, key, expected, indexer_sli):
def test_mask_key(self, obj, key, expected, val, indexer_sli):
# setitem with boolean mask
mask = np.zeros(obj.shape, dtype=bool)
mask[key] = True

obj = obj.copy()
indexer_sli(obj)[mask] = np.nan
indexer_sli(obj)[mask] = val
tm.assert_series_equal(obj, expected)

def test_series_where(self, obj, key, expected):
def test_series_where(self, obj, key, expected, val):
mask = np.zeros(obj.shape, dtype=bool)
mask[key] = True

obj = obj.copy()
res = obj.where(~mask, np.nan)
res = obj.where(~mask, val)
tm.assert_series_equal(res, expected)

def test_index_where(self, obj, key, expected, request):
def test_index_where(self, obj, key, expected, val, request):
if Index(obj).dtype != obj.dtype:
pytest.skip("test not applicable for this dtype")

mask = np.zeros(obj.shape, dtype=bool)
mask[key] = True

Expand All @@ -337,15 +354,18 @@ def test_index_where(self, obj, key, expected, request):
mark = pytest.mark.xfail(reason=msg)
request.node.add_marker(mark)

res = Index(obj).where(~mask, np.nan)
res = Index(obj).where(~mask, val)
tm.assert_index_equal(res, Index(expected))

@pytest.mark.xfail(reason="Index/Series casting behavior inconsistent GH#38692")
def test_index_putmask(self, obj, key, expected):
def test_index_putmask(self, obj, key, expected, val):
if Index(obj).dtype != obj.dtype:
pytest.skip("test not applicable for this dtype")

mask = np.zeros(obj.shape, dtype=bool)
mask[key] = True

res = Index(obj).putmask(mask, np.nan)
res = Index(obj).putmask(mask, val)
tm.assert_index_equal(res, Index(expected))


Expand Down