Skip to content

Commit 4861964

Browse files
authored
BUG: Series[int].__setitem__(mask, td64_or_dt64) incorrect casting (#39619)
1 parent 87f72bb commit 4861964

File tree

5 files changed

+92
-70
lines changed

5 files changed

+92
-70
lines changed

doc/source/whatsnew/v1.3.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ Indexing
339339
- Bug in :meth:`Series.__setitem__` raising ``ValueError`` when setting a :class:`Series` with a scalar indexer (:issue:`38303`)
340340
- Bug in :meth:`DataFrame.loc` dropping levels of :class:`MultiIndex` when :class:`DataFrame` used as input has only one row (:issue:`10521`)
341341
- Bug in :meth:`DataFrame.__getitem__` and :meth:`Series.__getitem__` always raising ``KeyError`` when slicing with existing strings an :class:`Index` with milliseconds (:issue:`33589`)
342-
- Bug in setting ``timedelta64`` values into numeric :class:`Series` failing to cast to object dtype (:issue:`39086`)
342+
- Bug in setting ``timedelta64`` or ``datetime64`` values into numeric :class:`Series` failing to cast to object dtype (:issue:`39086`, issue:`39619`)
343343
- Bug in setting :class:`Interval` values into a :class:`Series` or :class:`DataFrame` with mismatched :class:`IntervalDtype` incorrectly casting the new values to the existing dtype (:issue:`39120`)
344344
- Bug in setting ``datetime64`` values into a :class:`Series` with integer-dtype incorrect casting the datetime64 values to integers (:issue:`39266`)
345345
- Bug in :meth:`Index.get_loc` not raising ``KeyError`` when method is specified for ``NaN`` value when ``NaN`` is not in :class:`Index` (:issue:`39382`)

pandas/core/generic.py

+5-26
Original file line numberDiff line numberDiff line change
@@ -8875,32 +8875,11 @@ def _where(
88758875
if isinstance(other, (np.ndarray, ExtensionArray)):
88768876

88778877
if other.shape != self.shape:
8878-
8879-
if self.ndim == 1:
8880-
8881-
icond = cond._values
8882-
8883-
# GH 2745 / GH 4192
8884-
# treat like a scalar
8885-
if len(other) == 1:
8886-
other = other[0]
8887-
8888-
# GH 3235
8889-
# match True cond to other
8890-
elif len(cond[icond]) == len(other):
8891-
8892-
# try to not change dtype at first
8893-
new_other = self._values
8894-
new_other = new_other.copy()
8895-
new_other[icond] = other
8896-
other = new_other
8897-
8898-
else:
8899-
raise ValueError(
8900-
"Length of replacements must equal series length"
8901-
)
8902-
8903-
else:
8878+
if self.ndim != 1:
8879+
# In the ndim == 1 case we may have
8880+
# other length 1, which we treat as scalar (GH#2745, GH#4192)
8881+
# or len(other) == icond.sum(), which we treat like
8882+
# __setitem__ (GH#3235)
89048883
raise ValueError(
89058884
"other must be the same shape as self when an ndarray"
89068885
)

pandas/core/indexes/base.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -4550,11 +4550,16 @@ def putmask(self, mask, value):
45504550
return self.astype(dtype).putmask(mask, value)
45514551

45524552
values = self._values.copy()
4553-
if isinstance(converted, np.timedelta64) and self.dtype == object:
4553+
dtype, _ = infer_dtype_from(converted, pandas_dtype=True)
4554+
if dtype.kind in ["m", "M"]:
45544555
# https://github.com/numpy/numpy/issues/12550
45554556
# timedelta64 will incorrectly cast to int
4556-
converted = [converted] * mask.sum()
4557-
values[mask] = converted
4557+
if not is_list_like(converted):
4558+
converted = [converted] * mask.sum()
4559+
values[mask] = converted
4560+
else:
4561+
converted = list(converted)
4562+
np.putmask(values, mask, converted)
45584563
else:
45594564
np.putmask(values, mask, converted)
45604565

pandas/core/internals/blocks.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -1031,7 +1031,8 @@ def putmask(self, mask, new) -> List[Block]:
10311031
elif not mask.any():
10321032
return [self]
10331033

1034-
elif isinstance(new, np.timedelta64):
1034+
dtype, _ = infer_dtype_from(new)
1035+
if dtype.kind in ["m", "M"]:
10351036
# using putmask with object dtype will incorrect cast to object
10361037
# Having excluded self._can_hold_element, we know we cannot operate
10371038
# in-place, so we are safe using `where`
@@ -1317,10 +1318,15 @@ def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]:
13171318
blocks = block.where(orig_other, cond, errors=errors, axis=axis)
13181319
return self._maybe_downcast(blocks, "infer")
13191320

1320-
elif isinstance(other, np.timedelta64):
1321-
# expressions.where will cast np.timedelta64 to int
1322-
result = self.values.copy()
1323-
result[~cond] = [other] * (~cond).sum()
1321+
dtype, _ = infer_dtype_from(other, pandas_dtype=True)
1322+
if dtype.kind in ["m", "M"] and dtype.kind != values.dtype.kind:
1323+
# expressions.where would cast np.timedelta64 to int
1324+
if not is_list_like(other):
1325+
other = [other] * (~cond).sum()
1326+
else:
1327+
other = list(other)
1328+
result = values.copy()
1329+
np.putmask(result, ~cond, other)
13241330

13251331
else:
13261332
# convert datetime to datetime64, timedelta to timedelta64

pandas/tests/series/indexing/test_setitem.py

+67-35
Original file line numberDiff line numberDiff line change
@@ -74,18 +74,6 @@ def test_setitem_tuple_with_datetimetz_values(self):
7474
tm.assert_series_equal(result, expected)
7575

7676

77-
class TestSetitemPeriodDtype:
78-
@pytest.mark.parametrize("na_val", [None, np.nan])
79-
def test_setitem_na_period_dtype_casts_to_nat(self, na_val):
80-
ser = Series(period_range("2000-01-01", periods=10, freq="D"))
81-
82-
ser[3] = na_val
83-
assert ser[3] is NaT
84-
85-
ser[3:5] = na_val
86-
assert ser[4] is NaT
87-
88-
8977
class TestSetitemScalarIndexer:
9078
def test_setitem_negative_out_of_bounds(self):
9179
ser = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10))
@@ -259,29 +247,6 @@ def test_setitem_callable_other(self):
259247

260248

261249
class TestSetitemCasting:
262-
@pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])
263-
def test_setitem_dt64_into_int_series(self, dtype):
264-
# dont cast dt64 to int when doing this setitem
265-
orig = Series([1, 2, 3])
266-
267-
val = np.datetime64("2021-01-18 13:25:00", "ns")
268-
if dtype == "m8[ns]":
269-
val = val - val
270-
271-
ser = orig.copy()
272-
ser[:-1] = val
273-
expected = Series([val, val, 3], dtype=object)
274-
tm.assert_series_equal(ser, expected)
275-
assert isinstance(ser[0], type(val))
276-
277-
ser = orig.copy()
278-
ser[:-1] = [val, val]
279-
tm.assert_series_equal(ser, expected)
280-
281-
ser = orig.copy()
282-
ser[:-1] = np.array([val, val])
283-
tm.assert_series_equal(ser, expected)
284-
285250
@pytest.mark.parametrize("unique", [True, False])
286251
@pytest.mark.parametrize("val", [3, 3.0, "3"], ids=type)
287252
def test_setitem_non_bool_into_bool(self, val, indexer_sli, unique):
@@ -599,3 +564,70 @@ def is_inplace(self):
599564
Indicate we do _not_ expect the setting to be done inplace.
600565
"""
601566
return False
567+
568+
569+
class TestSetitemDT64IntoInt(SetitemCastingEquivalents):
570+
# GH#39619 dont cast dt64 to int when doing this setitem
571+
572+
@pytest.fixture(params=["M8[ns]", "m8[ns]"])
573+
def dtype(self, request):
574+
return request.param
575+
576+
@pytest.fixture
577+
def scalar(self, dtype):
578+
val = np.datetime64("2021-01-18 13:25:00", "ns")
579+
if dtype == "m8[ns]":
580+
val = val - val
581+
return val
582+
583+
@pytest.fixture
584+
def expected(self, scalar):
585+
expected = Series([scalar, scalar, 3], dtype=object)
586+
assert isinstance(expected[0], type(scalar))
587+
return expected
588+
589+
@pytest.fixture
590+
def obj(self):
591+
return Series([1, 2, 3])
592+
593+
@pytest.fixture
594+
def key(self):
595+
return slice(None, -1)
596+
597+
@pytest.fixture(params=[None, list, np.array])
598+
def val(self, scalar, request):
599+
box = request.param
600+
if box is None:
601+
return scalar
602+
return box([scalar, scalar])
603+
604+
@pytest.fixture
605+
def is_inplace(self):
606+
return False
607+
608+
609+
class TestSetitemNAPeriodDtype(SetitemCastingEquivalents):
610+
# Setting compatible NA values into Series with PeriodDtype
611+
612+
@pytest.fixture
613+
def expected(self, key):
614+
exp = Series(period_range("2000-01-01", periods=10, freq="D"))
615+
exp._values.view("i8")[key] = NaT.value
616+
assert exp[key] is NaT or all(x is NaT for x in exp[key])
617+
return exp
618+
619+
@pytest.fixture
620+
def obj(self):
621+
return Series(period_range("2000-01-01", periods=10, freq="D"))
622+
623+
@pytest.fixture(params=[3, slice(3, 5)])
624+
def key(self, request):
625+
return request.param
626+
627+
@pytest.fixture(params=[None, np.nan])
628+
def val(self, request):
629+
return request.param
630+
631+
@pytest.fixture
632+
def is_inplace(self):
633+
return True

0 commit comments

Comments
 (0)