Skip to content

Commit c1c6b3e

Browse files
jbrockmendelyehoshuadimarsky
authored andcommitted
BUG: Series[dt64].__setitem__ with all-false mask incorrectly upcasting (pandas-dev#45967)
1 parent d70eb95 commit c1c6b3e

File tree

5 files changed

+65
-15
lines changed

5 files changed

+65
-15
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,7 @@ Indexing
327327
- Bug in :meth:`loc.__setitem__` treating ``range`` keys as positional instead of label-based (:issue:`45479`)
328328
- Bug in :meth:`Series.__setitem__` when setting ``boolean`` dtype values containing ``NA`` incorrectly raising instead of casting to ``boolean`` dtype (:issue:`45462`)
329329
- Bug in :meth:`Series.__setitem__` where setting :attr:`NA` into a numeric-dtpye :class:`Series` would incorrectly upcast to object-dtype rather than treating the value as ``np.nan`` (:issue:`44199`)
330+
- Bug in :meth:`Series.__setitem__` with ``datetime64[ns]`` dtype, an all-``False`` boolean mask, and an incompatible value incorrectly casting to ``object`` instead of retaining ``datetime64[ns]`` dtype (:issue:`45967`)
330331
- Bug in :meth:`Index.__getitem__` raising ``ValueError`` when indexer is from boolean dtype with ``NA`` (:issue:`45806`)
331332
- Bug in :meth:`Series.mask` with ``inplace=True`` or setting values with a boolean mask with small integer dtypes incorrectly raising (:issue:`45750`)
332333
- Bug in :meth:`DataFrame.mask` with ``inplace=True`` and ``ExtensionDtype`` columns incorrectly raising (:issue:`45577`)

pandas/core/array_algos/putmask.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,11 @@ def putmask_without_repeat(
8181
# TODO: this prob needs some better checking for 2D cases
8282
nlocs = mask.sum()
8383
if nlocs > 0 and is_list_like(new) and getattr(new, "ndim", 1) == 1:
84-
if nlocs == len(new):
84+
shape = np.shape(new)
85+
# np.shape compat for if setitem_datetimelike_compat
86+
# changed arraylike to list e.g. test_where_dt64_2d
87+
88+
if nlocs == shape[-1]:
8589
# GH#30567
8690
# If length of ``new`` is less than the length of ``values``,
8791
# `np.putmask` would first repeat the ``new`` array and then
@@ -90,7 +94,7 @@ def putmask_without_repeat(
9094
# to place in the masked locations of ``values``
9195
np.place(values, mask, new)
9296
# i.e. values[mask] = new
93-
elif mask.shape[-1] == len(new) or len(new) == 1:
97+
elif mask.shape[-1] == shape[-1] or shape[-1] == 1:
9498
np.putmask(values, mask, new)
9599
else:
96100
raise ValueError("cannot assign mismatch length to masked array")

pandas/core/internals/blocks.py

+33-11
Original file line numberDiff line numberDiff line change
@@ -1478,26 +1478,48 @@ def putmask(self, mask, new) -> list[Block]:
14781478
new = self._maybe_squeeze_arg(new)
14791479
mask = self._maybe_squeeze_arg(mask)
14801480

1481+
if not mask.any():
1482+
return [self]
1483+
14811484
try:
14821485
# Caller is responsible for ensuring matching lengths
14831486
values._putmask(mask, new)
14841487
except (TypeError, ValueError) as err:
14851488
_catch_deprecated_value_error(err)
14861489

1487-
if is_interval_dtype(self.dtype):
1488-
# Discussion about what we want to support in the general
1489-
# case GH#39584
1490-
blk = self.coerce_to_target_dtype(orig_new)
1491-
return blk.putmask(orig_mask, orig_new)
1490+
if self.ndim == 1 or self.shape[0] == 1:
14921491

1493-
elif isinstance(self, NDArrayBackedExtensionBlock):
1494-
# NB: not (yet) the same as
1495-
# isinstance(values, NDArrayBackedExtensionArray)
1496-
blk = self.coerce_to_target_dtype(orig_new)
1497-
return blk.putmask(orig_mask, orig_new)
1492+
if is_interval_dtype(self.dtype):
1493+
# Discussion about what we want to support in the general
1494+
# case GH#39584
1495+
blk = self.coerce_to_target_dtype(orig_new)
1496+
return blk.putmask(orig_mask, orig_new)
1497+
1498+
elif isinstance(self, NDArrayBackedExtensionBlock):
1499+
# NB: not (yet) the same as
1500+
# isinstance(values, NDArrayBackedExtensionArray)
1501+
blk = self.coerce_to_target_dtype(orig_new)
1502+
return blk.putmask(orig_mask, orig_new)
1503+
1504+
else:
1505+
raise
14981506

14991507
else:
1500-
raise
1508+
# Same pattern we use in Block.putmask
1509+
is_array = isinstance(orig_new, (np.ndarray, ExtensionArray))
1510+
1511+
res_blocks = []
1512+
nbs = self._split()
1513+
for i, nb in enumerate(nbs):
1514+
n = orig_new
1515+
if is_array:
1516+
# we have a different value per-column
1517+
n = orig_new[:, i : i + 1]
1518+
1519+
submask = orig_mask[:, i : i + 1]
1520+
rbs = nb.putmask(submask, n)
1521+
res_blocks.extend(rbs)
1522+
return res_blocks
15011523

15021524
return [self]
15031525

pandas/tests/frame/indexing/test_where.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -988,8 +988,16 @@ def _check_where_equivalences(df, mask, other, expected):
988988
res = df.mask(~mask, other)
989989
tm.assert_frame_equal(res, expected)
990990

991-
# Note: we cannot do the same with frame.mask(~mask, other, inplace=True)
992-
# bc that goes through Block.putmask which does *not* downcast.
991+
# Note: frame.mask(~mask, other, inplace=True) takes some more work bc
992+
# Block.putmask does *not* downcast. The change to 'expected' here
993+
# is specific to the cases in test_where_dt64_2d.
994+
df = df.copy()
995+
df.mask(~mask, other, inplace=True)
996+
if not mask.all():
997+
# with mask.all(), Block.putmask is a no-op, so does not downcast
998+
expected = expected.copy()
999+
expected["A"] = expected["A"].astype(object)
1000+
tm.assert_frame_equal(df, expected)
9931001

9941002

9951003
def test_where_dt64_2d():

pandas/tests/series/indexing/test_setitem.py

+15
Original file line numberDiff line numberDiff line change
@@ -1625,3 +1625,18 @@ def test_setitem_bool_indexer_dont_broadcast_length1_values(size, mask, item, bo
16251625
expected = Series(np.arange(size, dtype=float))
16261626
expected[selection] = item
16271627
tm.assert_series_equal(ser, expected)
1628+
1629+
1630+
def test_setitem_empty_mask_dont_upcast_dt64():
1631+
dti = date_range("2016-01-01", periods=3)
1632+
ser = Series(dti)
1633+
orig = ser.copy()
1634+
mask = np.zeros(3, dtype=bool)
1635+
1636+
ser[mask] = "foo"
1637+
assert ser.dtype == dti.dtype # no-op -> dont upcast
1638+
tm.assert_series_equal(ser, orig)
1639+
1640+
ser.mask(mask, "foo", inplace=True)
1641+
assert ser.dtype == dti.dtype # no-op -> dont upcast
1642+
tm.assert_series_equal(ser, orig)

0 commit comments

Comments
 (0)