Skip to content

API: Series[bool][key] = np.nan -> cast to object #38709

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Jan 28, 2021
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
6ee5c5f
TST: implement tm.check_setitem_equivalents
jbrockmendel Dec 24, 2020
33e39f9
re-write as fixturized
jbrockmendel Dec 26, 2020
5896615
CLN: algos.searchsorted (#38686)
jbrockmendel Dec 24, 2020
90a76ce
REF: simplify coerce_to_target_dtype (#38683)
jbrockmendel Dec 24, 2020
0827cd0
PERF: fix assert_frame_equal can be very slow (#38202)
ivanovmg Dec 24, 2020
803f495
API: cast to object when setting np.nan into Series[bool]
jbrockmendel Dec 27, 2020
055617b
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Dec 27, 2020
3be818f
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Dec 28, 2020
51ad10d
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Dec 29, 2020
279564b
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Jan 2, 2021
e3b2cec
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Jan 4, 2021
c4eb8e1
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Jan 6, 2021
ce4311a
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Jan 10, 2021
9e5cb27
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Jan 16, 2021
c01661d
fix putmask
jbrockmendel Jan 17, 2021
23c69dc
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Jan 17, 2021
198768e
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Jan 18, 2021
7c6fdb7
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Jan 19, 2021
e0c0194
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Jan 20, 2021
f02f514
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Jan 25, 2021
0934ad1
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Jan 27, 2021
d966441
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Jan 27, 2021
d2ac2b3
whatsnew, test, maybe_promote->find_common_type
jbrockmendel Jan 27, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions pandas/core/array_algos/putmask.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,11 @@ def putmask_smart(values: np.ndarray, mask: np.ndarray, new) -> np.ndarray:
# preserves dtype if possible
return _putmask_preserve(values, new, mask)

# change the dtype if needed
dtype, _ = maybe_promote(new.dtype)
if values.dtype == bool and new.dtype.kind == "f":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, shouldn't maybe_promote handle this? (`maybe_promote(new.dtype, values.dtype) ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wouldnt find_common_type make more sense?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yah this is nicer, updated

dtype = object
else:
# change the dtype if needed
dtype, _ = maybe_promote(new.dtype)

values = values.astype(dtype)

Expand Down
40 changes: 4 additions & 36 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
convert_scalar_for_putitemlike,
find_common_type,
infer_dtype_from,
infer_dtype_from_scalar,
maybe_downcast_numeric,
maybe_downcast_to_dtype,
maybe_promote,
Expand Down Expand Up @@ -904,24 +903,7 @@ def setitem(self, indexer, value):
values = self.values
if not self._can_hold_element(value):
# current dtype cannot store value, coerce to common dtype
# TODO: can we just use coerce_to_target_dtype for all this
if hasattr(value, "dtype"):
dtype = value.dtype

elif lib.is_scalar(value) and not isna(value):
dtype, _ = infer_dtype_from_scalar(value, pandas_dtype=True)

else:
# e.g. we are bool dtype and value is nan
# TODO: watch out for case with listlike value and scalar/empty indexer
dtype, _ = maybe_promote(np.array(value).dtype)
return self.astype(dtype).setitem(indexer, value)

dtype = find_common_type([values.dtype, dtype])
assert not is_dtype_equal(self.dtype, dtype)
# otherwise should have _can_hold_element

return self.astype(dtype).setitem(indexer, value)
return self.coerce_to_target_dtype(value).setitem(indexer, value)

if self.dtype.kind in ["m", "M"]:
arr = self.array_values().T
Expand Down Expand Up @@ -1310,29 +1292,15 @@ def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]:
else:
# see if we can operate on the entire block, or need item-by-item
# or if we are a single block (ndim == 1)
if (
(self.dtype.kind in ["b", "i", "u"])
and lib.is_float(other)
and np.isnan(other)
):
# GH#3733 special case to avoid object-dtype casting
# and go through numexpr path instead.
# In integer case, np.where will cast to floats
pass
elif not self._can_hold_element(other):
if not self._can_hold_element(other):
# we cannot coerce, return a compat dtype
# we are explicitly ignoring errors
block = self.coerce_to_target_dtype(other)
blocks = block.where(orig_other, cond, errors=errors, axis=axis)
return self._maybe_downcast(blocks, "infer")

if not (
(self.dtype.kind in ["b", "i", "u"])
and lib.is_float(other)
and np.isnan(other)
):
# convert datetime to datetime64, timedelta to timedelta64
other = convert_scalar_for_putitemlike(other, values.dtype)
# convert datetime to datetime64, timedelta to timedelta64
other = convert_scalar_for_putitemlike(other, values.dtype)

# By the time we get here, we should have all Series/Index
# args extracted to ndarray
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/frame/indexing/test_mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,13 @@ def test_mask_callable(self):
tm.assert_frame_equal(result, exp)
tm.assert_frame_equal(result, (df + 2).mask((df + 2) > 8, (df + 2) + 10))

def test_mask_dtype_conversion(self):
def test_mask_dtype_bool_conversion(self):
# GH#3733
df = DataFrame(data=np.random.randn(100, 50))
df = df.where(df > 0) # create nans
bools = df > 0
mask = isna(df)
expected = bools.astype(float).mask(mask)
expected = bools.astype(object).mask(mask)
result = bools.mask(mask)
tm.assert_frame_equal(result, expected)

Expand Down
32 changes: 22 additions & 10 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,41 +242,47 @@ def test_setitem_callable_other(self):
@pytest.mark.parametrize(
"obj,expected,key",
[
(
pytest.param(
# these induce dtype changes
Series([2, 3, 4, 5, 6, 7, 8, 9, 10]),
Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan]),
slice(None, None, 2),
id="int_series_slice_key_step",
),
(
# gets coerced to float, right?
pytest.param(
Series([True, True, False, False]),
Series([np.nan, 1, np.nan, 0]),
Series([np.nan, True, np.nan, False], dtype=object),
slice(None, None, 2),
id="bool_series_slice_key_step",
),
(
pytest.param(
# these induce dtype changes
Series(np.arange(10)),
Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, 9]),
slice(None, 5),
id="int_series_slice_key",
),
(
pytest.param(
# changes dtype GH#4463
Series([1, 2, 3]),
Series([np.nan, 2, 3]),
0,
id="int_series_int_key",
),
(
pytest.param(
# changes dtype GH#4463
Series([False]),
Series([np.nan]),
Series([np.nan], dtype=object),
# TODO: maybe go to float64 since we are changing the _whole_ Series?
0,
id="bool_series_int_key_change_all",
),
(
pytest.param(
# changes dtype GH#4463
Series([False, True]),
Series([np.nan, 1.0]),
Series([np.nan, True], dtype=object),
0,
id="bool_series_int_key",
),
],
)
Expand Down Expand Up @@ -328,6 +334,9 @@ def test_series_where(self, obj, key, expected):
tm.assert_series_equal(res, expected)

def test_index_where(self, obj, key, expected, request):
if Index(obj).dtype != obj.dtype:
pytest.skip("test not applicable for this dtype")

mask = np.zeros(obj.shape, dtype=bool)
mask[key] = True

Expand All @@ -342,6 +351,9 @@ def test_index_where(self, obj, key, expected, request):

@pytest.mark.xfail(reason="Index/Series casting behavior inconsistent GH#38692")
def test_index_putmask(self, obj, key, expected):
if Index(obj).dtype != obj.dtype:
pytest.skip("test not applicable for this dtype")

mask = np.zeros(obj.shape, dtype=bool)
mask[key] = True

Expand Down