Skip to content

Commit facdb89

Browse files
authored
BUG: setting td64 value into numeric Series incorrectly casting to int (#39488)
1 parent 1ffedc2 commit facdb89

File tree

3 files changed

+152
-29
lines changed

3 files changed

+152
-29
lines changed

pandas/core/indexes/base.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -4326,7 +4326,15 @@ def where(self, cond, other=None):
43264326
except (ValueError, TypeError):
43274327
return self.astype(object).where(cond, other)
43284328

4329-
values = np.where(cond, values, other)
4329+
if isinstance(other, np.timedelta64) and self.dtype == object:
4330+
# https://github.com/numpy/numpy/issues/12550
4331+
# timedelta64 will incorrectly cast to int
4332+
other = [other] * (~cond).sum()
4333+
values = cast(np.ndarray, values).copy()
4334+
# error: Unsupported target for indexed assignment ("ArrayLike")
4335+
values[~cond] = other # type:ignore[index]
4336+
else:
4337+
values = np.where(cond, values, other)
43304338

43314339
return Index(values, name=self.name)
43324340

pandas/core/internals/blocks.py

+18-7
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
infer_dtype_from,
3030
maybe_downcast_numeric,
3131
maybe_downcast_to_dtype,
32-
maybe_promote,
3332
maybe_upcast,
3433
soft_convert_objects,
3534
)
@@ -1032,6 +1031,12 @@ def putmask(self, mask, new) -> List[Block]:
10321031
elif not mask.any():
10331032
return [self]
10341033

1034+
elif isinstance(new, np.timedelta64):
1035+
# using putmask with object dtype will incorrect cast to object
1036+
# Having excluded self._can_hold_element, we know we cannot operate
1037+
# in-place, so we are safe using `where`
1038+
return self.where(new, ~mask)
1039+
10351040
else:
10361041
# may need to upcast
10371042
if transpose:
@@ -1053,7 +1058,7 @@ def f(mask, val, idx):
10531058
n = np.array(new)
10541059

10551060
# type of the new block
1056-
dtype, _ = maybe_promote(n.dtype)
1061+
dtype = find_common_type([n.dtype, val.dtype])
10571062

10581063
# we need to explicitly astype here to make a copy
10591064
n = n.astype(dtype)
@@ -1312,12 +1317,18 @@ def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]:
13121317
blocks = block.where(orig_other, cond, errors=errors, axis=axis)
13131318
return self._maybe_downcast(blocks, "infer")
13141319

1315-
# convert datetime to datetime64, timedelta to timedelta64
1316-
other = convert_scalar_for_putitemlike(other, values.dtype)
1320+
elif isinstance(other, np.timedelta64):
1321+
# expressions.where will cast np.timedelta64 to int
1322+
result = self.values.copy()
1323+
result[~cond] = [other] * (~cond).sum()
1324+
1325+
else:
1326+
# convert datetime to datetime64, timedelta to timedelta64
1327+
other = convert_scalar_for_putitemlike(other, values.dtype)
13171328

1318-
# By the time we get here, we should have all Series/Index
1319-
# args extracted to ndarray
1320-
result = expressions.where(cond, values, other)
1329+
# By the time we get here, we should have all Series/Index
1330+
# args extracted to ndarray
1331+
result = expressions.where(cond, values, other)
13211332

13221333
if self._can_hold_na or self.ndim == 1:
13231334

pandas/tests/series/indexing/test_setitem.py

+125-21
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import numpy as np
44
import pytest
55

6+
from pandas.compat import np_version_under1p20
7+
68
from pandas import (
79
DatetimeIndex,
810
Index,
@@ -516,25 +518,127 @@ def test_setitem_slice_into_readonly_backing_data():
516518
assert not array.any()
517519

518520

519-
@pytest.mark.parametrize(
520-
"key", [0, slice(0, 1), [0], np.array([0]), range(1)], ids=type
521-
)
522-
@pytest.mark.parametrize("dtype", [complex, int, float])
523-
def test_setitem_td64_into_complex(key, dtype, indexer_sli):
524-
# timedelta64 should not be treated as integers
525-
arr = np.arange(5).astype(dtype)
526-
ser = Series(arr)
527-
td = np.timedelta64(4, "ns")
528-
529-
indexer_sli(ser)[key] = td
530-
assert ser.dtype == object
531-
assert arr[0] == 0 # original array is unchanged
532-
533-
if not isinstance(key, int) and not (
534-
indexer_sli is tm.loc and isinstance(key, slice)
535-
):
536-
# skip key/indexer_sli combinations that will have mismatched lengths
521+
class TestSetitemCastingEquivalentsTimedelta64IntoNumeric:
522+
# timedelta64 should not be treated as integers when setting into
523+
# numeric Series
524+
525+
@pytest.fixture
526+
def val(self):
527+
td = np.timedelta64(4, "ns")
528+
return td
529+
return np.full((1,), td)
530+
531+
@pytest.fixture(params=[complex, int, float])
532+
def dtype(self, request):
533+
return request.param
534+
535+
@pytest.fixture
536+
def obj(self, dtype):
537+
arr = np.arange(5).astype(dtype)
538+
ser = Series(arr)
539+
return ser
540+
541+
@pytest.fixture
542+
def expected(self, dtype):
543+
arr = np.arange(5).astype(dtype)
537544
ser = Series(arr)
538-
indexer_sli(ser)[key] = np.full((1,), td)
539-
assert ser.dtype == object
540-
assert arr[0] == 0 # original array is unchanged
545+
ser = ser.astype(object)
546+
ser.values[0] = np.timedelta64(4, "ns")
547+
return ser
548+
549+
@pytest.fixture
550+
def key(self):
551+
return 0
552+
553+
def check_indexer(self, obj, key, expected, val, indexer):
554+
orig = obj
555+
obj = obj.copy()
556+
arr = obj._values
557+
558+
indexer(obj)[key] = val
559+
tm.assert_series_equal(obj, expected)
560+
561+
tm.assert_equal(arr, orig._values) # original array is unchanged
562+
563+
def test_int_key(self, obj, key, expected, val, indexer_sli):
564+
if not isinstance(key, int):
565+
return
566+
567+
self.check_indexer(obj, key, expected, val, indexer_sli)
568+
569+
rng = range(key, key + 1)
570+
self.check_indexer(obj, rng, expected, val, indexer_sli)
571+
572+
if indexer_sli is not tm.loc:
573+
# Note: no .loc because that handles slice edges differently
574+
slc = slice(key, key + 1)
575+
self.check_indexer(obj, slc, expected, val, indexer_sli)
576+
577+
ilkey = [key]
578+
self.check_indexer(obj, ilkey, expected, val, indexer_sli)
579+
580+
indkey = np.array(ilkey)
581+
self.check_indexer(obj, indkey, expected, val, indexer_sli)
582+
583+
def test_slice_key(self, obj, key, expected, val, indexer_sli):
584+
if not isinstance(key, slice):
585+
return
586+
587+
if indexer_sli is not tm.loc:
588+
# Note: no .loc because that handles slice edges differently
589+
self.check_indexer(obj, key, expected, val, indexer_sli)
590+
591+
ilkey = list(range(len(obj)))[key]
592+
self.check_indexer(obj, ilkey, expected, val, indexer_sli)
593+
594+
indkey = np.array(ilkey)
595+
self.check_indexer(obj, indkey, expected, val, indexer_sli)
596+
597+
def test_mask_key(self, obj, key, expected, val, indexer_sli):
598+
# setitem with boolean mask
599+
mask = np.zeros(obj.shape, dtype=bool)
600+
mask[key] = True
601+
602+
self.check_indexer(obj, mask, expected, val, indexer_sli)
603+
604+
def test_series_where(self, obj, key, expected, val):
605+
mask = np.zeros(obj.shape, dtype=bool)
606+
mask[key] = True
607+
608+
orig = obj
609+
obj = obj.copy()
610+
arr = obj._values
611+
res = obj.where(~mask, val)
612+
tm.assert_series_equal(res, expected)
613+
614+
tm.assert_equal(arr, orig._values) # original array is unchanged
615+
616+
def test_index_where(self, obj, key, expected, val, request):
617+
if Index(obj).dtype != obj.dtype:
618+
pytest.skip("test not applicable for this dtype")
619+
620+
mask = np.zeros(obj.shape, dtype=bool)
621+
mask[key] = True
622+
623+
if obj.dtype == bool and not mask.all():
624+
# When mask is all True, casting behavior does not apply
625+
msg = "Index/Series casting behavior inconsistent GH#38692"
626+
mark = pytest.mark.xfail(reason=msg)
627+
request.node.add_marker(mark)
628+
629+
res = Index(obj).where(~mask, val)
630+
tm.assert_index_equal(res, Index(expected))
631+
632+
@pytest.mark.xfail(
633+
np_version_under1p20,
634+
reason="Index/Series casting behavior inconsistent GH#38692",
635+
)
636+
def test_index_putmask(self, obj, key, expected, val):
637+
if Index(obj).dtype != obj.dtype:
638+
pytest.skip("test not applicable for this dtype")
639+
640+
mask = np.zeros(obj.shape, dtype=bool)
641+
mask[key] = True
642+
643+
res = Index(obj).putmask(mask, val)
644+
tm.assert_index_equal(res, Index(expected))

0 commit comments

Comments
 (0)