Skip to content

BUG: Series[int].__setitem__(mask, td64_or_dt64) incorrect casting #39619

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ Indexing
- Bug in :meth:`Series.__setitem__` raising ``ValueError`` when setting a :class:`Series` with a scalar indexer (:issue:`38303`)
- Bug in :meth:`DataFrame.loc` dropping levels of :class:`MultiIndex` when :class:`DataFrame` used as input has only one row (:issue:`10521`)
- Bug in :meth:`DataFrame.__getitem__` and :meth:`Series.__getitem__` always raising ``KeyError`` when slicing with existing strings an :class:`Index` with milliseconds (:issue:`33589`)
- Bug in setting ``timedelta64`` values into numeric :class:`Series` failing to cast to object dtype (:issue:`39086`)
- Bug in setting ``timedelta64`` or ``datetime64`` values into numeric :class:`Series` failing to cast to object dtype (:issue:`39086`, issue:`39619`)
- Bug in setting :class:`Interval` values into a :class:`Series` or :class:`DataFrame` with mismatched :class:`IntervalDtype` incorrectly casting the new values to the existing dtype (:issue:`39120`)
- Bug in setting ``datetime64`` values into a :class:`Series` with integer-dtype incorrect casting the datetime64 values to integers (:issue:`39266`)
- Bug in :meth:`Index.get_loc` not raising ``KeyError`` when method is specified for ``NaN`` value when ``NaN`` is not in :class:`Index` (:issue:`39382`)
Expand Down
31 changes: 5 additions & 26 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8891,32 +8891,11 @@ def _where(
if isinstance(other, (np.ndarray, ExtensionArray)):

if other.shape != self.shape:

if self.ndim == 1:

icond = cond._values

# GH 2745 / GH 4192
# treat like a scalar
if len(other) == 1:
other = other[0]

# GH 3235
# match True cond to other
elif len(cond[icond]) == len(other):

# try to not change dtype at first
new_other = self._values
new_other = new_other.copy()
new_other[icond] = other
other = new_other

else:
raise ValueError(
"Length of replacements must equal series length"
)

else:
if self.ndim != 1:
# In the ndim == 1 case we may have
# other length 1, which we treat as scalar (GH#2745, GH#4192)
# or len(other) == icond.sum(), which we treat like
# __setitem__ (GH#3235)
raise ValueError(
"other must be the same shape as self when an ndarray"
)
Expand Down
11 changes: 8 additions & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4550,11 +4550,16 @@ def putmask(self, mask, value):
return self.astype(dtype).putmask(mask, value)

values = self._values.copy()
if isinstance(converted, np.timedelta64) and self.dtype == object:
dtype, _ = infer_dtype_from(converted, pandas_dtype=True)
if dtype.kind in ["m", "M"]:
# https://github.com/numpy/numpy/issues/12550
# timedelta64 will incorrectly cast to int
converted = [converted] * mask.sum()
values[mask] = converted
if not is_list_like(converted):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this pattern is pretty common, maybe a helper function?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yah one more PR coming up that uses this pattern yet again; i think its going to end up in array_algos.putmask,

converted = [converted] * mask.sum()
values[mask] = converted
else:
converted = list(converted)
np.putmask(values, mask, converted)
else:
np.putmask(values, mask, converted)

Expand Down
16 changes: 11 additions & 5 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1031,7 +1031,8 @@ def putmask(self, mask, new) -> List[Block]:
elif not mask.any():
return [self]

elif isinstance(new, np.timedelta64):
dtype, _ = infer_dtype_from(new)
if dtype.kind in ["m", "M"]:
# using putmask with object dtype will incorrect cast to object
# Having excluded self._can_hold_element, we know we cannot operate
# in-place, so we are safe using `where`
Expand Down Expand Up @@ -1317,10 +1318,15 @@ def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]:
blocks = block.where(orig_other, cond, errors=errors, axis=axis)
return self._maybe_downcast(blocks, "infer")

elif isinstance(other, np.timedelta64):
# expressions.where will cast np.timedelta64 to int
result = self.values.copy()
result[~cond] = [other] * (~cond).sum()
dtype, _ = infer_dtype_from(other, pandas_dtype=True)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

e.g here as well

if dtype.kind in ["m", "M"] and dtype.kind != values.dtype.kind:
# expressions.where would cast np.timedelta64 to int
if not is_list_like(other):
other = [other] * (~cond).sum()
else:
other = list(other)
result = values.copy()
np.putmask(result, ~cond, other)

else:
# convert datetime to datetime64, timedelta to timedelta64
Expand Down
102 changes: 67 additions & 35 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,18 +74,6 @@ def test_setitem_tuple_with_datetimetz_values(self):
tm.assert_series_equal(result, expected)


class TestSetitemPeriodDtype:
@pytest.mark.parametrize("na_val", [None, np.nan])
def test_setitem_na_period_dtype_casts_to_nat(self, na_val):
ser = Series(period_range("2000-01-01", periods=10, freq="D"))

ser[3] = na_val
assert ser[3] is NaT

ser[3:5] = na_val
assert ser[4] is NaT


class TestSetitemScalarIndexer:
def test_setitem_negative_out_of_bounds(self):
ser = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10))
Expand Down Expand Up @@ -259,29 +247,6 @@ def test_setitem_callable_other(self):


class TestSetitemCasting:
@pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])
def test_setitem_dt64_into_int_series(self, dtype):
# dont cast dt64 to int when doing this setitem
orig = Series([1, 2, 3])

val = np.datetime64("2021-01-18 13:25:00", "ns")
if dtype == "m8[ns]":
val = val - val

ser = orig.copy()
ser[:-1] = val
expected = Series([val, val, 3], dtype=object)
tm.assert_series_equal(ser, expected)
assert isinstance(ser[0], type(val))

ser = orig.copy()
ser[:-1] = [val, val]
tm.assert_series_equal(ser, expected)

ser = orig.copy()
ser[:-1] = np.array([val, val])
tm.assert_series_equal(ser, expected)

@pytest.mark.parametrize("unique", [True, False])
@pytest.mark.parametrize("val", [3, 3.0, "3"], ids=type)
def test_setitem_non_bool_into_bool(self, val, indexer_sli, unique):
Expand Down Expand Up @@ -599,3 +564,70 @@ def is_inplace(self):
Indicate we do _not_ expect the setting to be done inplace.
"""
return False


class TestSetitemDT64IntoInt(SetitemCastingEquivalents):
# GH#39619 dont cast dt64 to int when doing this setitem

@pytest.fixture(params=["M8[ns]", "m8[ns]"])
def dtype(self, request):
return request.param

@pytest.fixture
def scalar(self, dtype):
val = np.datetime64("2021-01-18 13:25:00", "ns")
if dtype == "m8[ns]":
val = val - val
return val

@pytest.fixture
def expected(self, scalar):
expected = Series([scalar, scalar, 3], dtype=object)
assert isinstance(expected[0], type(scalar))
return expected

@pytest.fixture
def obj(self):
return Series([1, 2, 3])

@pytest.fixture
def key(self):
return slice(None, -1)

@pytest.fixture(params=[None, list, np.array])
def val(self, scalar, request):
box = request.param
if box is None:
return scalar
return box([scalar, scalar])

@pytest.fixture
def is_inplace(self):
return False


class TestSetitemNAPeriodDtype(SetitemCastingEquivalents):
# Setting compatible NA values into Series with PeriodDtype

@pytest.fixture
def expected(self, key):
exp = Series(period_range("2000-01-01", periods=10, freq="D"))
exp._values.view("i8")[key] = NaT.value
assert exp[key] is NaT or all(x is NaT for x in exp[key])
return exp

@pytest.fixture
def obj(self):
return Series(period_range("2000-01-01", periods=10, freq="D"))

@pytest.fixture(params=[3, slice(3, 5)])
def key(self, request):
return request.param

@pytest.fixture(params=[None, np.nan])
def val(self, request):
return request.param

@pytest.fixture
def is_inplace(self):
return True