Skip to content

Commit 3875231

Browse files
authored
BUG: Series.mask with small int dtypes raising (#45750)
1 parent 419331c commit 3875231

File tree

5 files changed

+48
-65
lines changed

5 files changed

+48
-65
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,7 @@ Indexing
271271
- Bug in :meth:`loc.__setitem__` treating ``range`` keys as positional instead of label-based (:issue:`45479`)
272272
- Bug in :meth:`Series.__setitem__` when setting ``boolean`` dtype values containing ``NA`` incorrectly raising instead of casting to ``boolean`` dtype (:issue:`45462`)
273273
- Bug in :meth:`Series.__setitem__` where setting :attr:`NA` into a numeric-dtpye :class:`Series` would incorrectly upcast to object-dtype rather than treating the value as ``np.nan`` (:issue:`44199`)
274+
- Bug in :meth:`Series.mask` with ``inplace=True`` or setting values with a boolean mask with small integer dtypes incorrectly raising (:issue:`45750`)
274275
- Bug in :meth:`DataFrame.mask` with ``inplace=True`` and ``ExtensionDtype`` columns incorrectly raising (:issue:`45577`)
275276
- Bug in getting a column from a DataFrame with an object-dtype row index with datetime-like values: the resulting Series now preserves the exact object-dtype Index from the parent DataFrame (:issue:`42950`)
276277
- Bug in indexing on a :class:`DatetimeIndex` with a ``np.str_`` key incorrectly raising (:issue:`45580`)

pandas/core/array_algos/putmask.py

-55
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@
1515
from pandas.compat import np_version_under1p20
1616

1717
from pandas.core.dtypes.cast import (
18-
can_hold_element,
1918
convert_scalar_for_putitemlike,
20-
find_common_type,
2119
infer_dtype_from,
2220
)
2321
from pandas.core.dtypes.common import is_list_like
@@ -61,59 +59,6 @@ def putmask_inplace(values: ArrayLike, mask: npt.NDArray[np.bool_], value: Any)
6159
np.putmask(values, mask, value)
6260

6361

64-
def putmask_smart(values: np.ndarray, mask: npt.NDArray[np.bool_], new) -> np.ndarray:
65-
"""
66-
Return a new ndarray, try to preserve dtype if possible.
67-
68-
Parameters
69-
----------
70-
values : np.ndarray
71-
`values`, updated in-place.
72-
mask : np.ndarray[bool]
73-
Applies to both sides (array like).
74-
new : listlike `new values` aligned with `values`
75-
76-
Returns
77-
-------
78-
values : ndarray with updated values
79-
this *may* be a copy of the original
80-
81-
See Also
82-
--------
83-
np.putmask
84-
"""
85-
# we cannot use np.asarray() here as we cannot have conversions
86-
# that numpy does when numeric are mixed with strings
87-
88-
# see if we are only masking values that if putted
89-
# will work in the current dtype
90-
try:
91-
nn = new[mask]
92-
except TypeError:
93-
# TypeError: only integer scalar arrays can be converted to a scalar index
94-
pass
95-
else:
96-
# We only get to putmask_smart when we cannot hold 'new' in values.
97-
# The "smart" part of putmask_smart is checking if we can hold new[mask]
98-
# in values, in which case we can still avoid the need to cast.
99-
if can_hold_element(values, nn):
100-
values[mask] = nn
101-
return values
102-
103-
new = np.asarray(new)
104-
105-
if values.dtype.kind == new.dtype.kind:
106-
# preserves dtype if possible
107-
np.putmask(values, mask, new)
108-
return values
109-
110-
dtype = find_common_type([values.dtype, new.dtype])
111-
values = values.astype(dtype)
112-
113-
np.putmask(values, mask, new)
114-
return values
115-
116-
11762
def putmask_without_repeat(
11863
values: np.ndarray, mask: npt.NDArray[np.bool_], new: Any
11964
) -> None:

pandas/core/dtypes/cast.py

-1
Original file line numberDiff line numberDiff line change
@@ -1980,7 +1980,6 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
19801980
if tipo.kind not in ["i", "u"]:
19811981
if isinstance(element, np.ndarray) and element.dtype.kind == "f":
19821982
# If all can be losslessly cast to integers, then we can hold them
1983-
# We do something similar in putmask_smart
19841983
casted = element.astype(dtype)
19851984
comp = casted == element
19861985
if comp.all():

pandas/core/internals/blocks.py

+5-9
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@
7575
from pandas.core.array_algos.putmask import (
7676
extract_bool_array,
7777
putmask_inplace,
78-
putmask_smart,
7978
putmask_without_repeat,
8079
setitem_datetimelike_compat,
8180
validate_putmask,
@@ -978,15 +977,12 @@ def putmask(self, mask, new) -> list[Block]:
978977
# no need to split columns
979978

980979
if not is_list_like(new):
981-
# putmask_smart can't save us the need to cast
980+
# using just new[indexer] can't save us the need to cast
982981
return self.coerce_to_target_dtype(new).putmask(mask, new)
983-
984-
# This differs from
985-
# `self.coerce_to_target_dtype(new).putmask(mask, new)`
986-
# because putmask_smart will check if new[mask] may be held
987-
# by our dtype.
988-
nv = putmask_smart(values.T, mask, new).T
989-
return [self.make_block(nv)]
982+
else:
983+
indexer = mask.nonzero()[0]
984+
nb = self.setitem(indexer, new[indexer])
985+
return [nb]
990986

991987
else:
992988
is_array = isinstance(new, np.ndarray)

pandas/tests/series/indexing/test_setitem.py

+42
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,48 @@ def test_setitem_nan_with_bool(self):
364364
expected = Series([np.nan, False, True], dtype=object)
365365
tm.assert_series_equal(result, expected)
366366

367+
def test_setitem_mask_smallint_upcast(self):
368+
orig = Series([1, 2, 3], dtype="int8")
369+
alt = np.array([999, 1000, 1001], dtype=np.int64)
370+
371+
mask = np.array([True, False, True])
372+
373+
ser = orig.copy()
374+
ser[mask] = Series(alt)
375+
expected = Series([999, 2, 1001])
376+
tm.assert_series_equal(ser, expected)
377+
378+
ser2 = orig.copy()
379+
ser2.mask(mask, alt, inplace=True)
380+
tm.assert_series_equal(ser2, expected)
381+
382+
ser3 = orig.copy()
383+
res = ser3.where(~mask, Series(alt))
384+
tm.assert_series_equal(res, expected)
385+
386+
def test_setitem_mask_smallint_no_upcast(self):
387+
# like test_setitem_mask_smallint_upcast, but while we can't hold 'alt',
388+
# we *can* hold alt[mask] without casting
389+
orig = Series([1, 2, 3], dtype="uint8")
390+
alt = Series([245, 1000, 246], dtype=np.int64)
391+
392+
mask = np.array([True, False, True])
393+
394+
ser = orig.copy()
395+
ser[mask] = alt
396+
expected = Series([245, 2, 246], dtype="uint8")
397+
tm.assert_series_equal(ser, expected)
398+
399+
ser2 = orig.copy()
400+
ser2.mask(mask, alt, inplace=True)
401+
tm.assert_series_equal(ser2, expected)
402+
403+
# FIXME: don't leave commented-out
404+
# FIXME: ser.where(~mask, alt) unnecessarily upcasts to int64
405+
# ser3 = orig.copy()
406+
# res = ser3.where(~mask, alt)
407+
# tm.assert_series_equal(res, expected)
408+
367409

368410
class TestSetitemViewCopySemantics:
369411
def test_setitem_invalidates_datetime_index_freq(self):

0 commit comments

Comments
 (0)