Skip to content

Commit c6bdacb

Browse files
authored
BUG: Series[int8][:3] = range(3) unnecessary upcasting to int64 (#44261)
1 parent 2e29e11 commit c6bdacb

File tree

4 files changed

+100
-1
lines changed

4 files changed

+100
-1
lines changed

doc/source/whatsnew/v1.4.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -537,7 +537,8 @@ Indexing
537537
- Bug in :meth:`Index.get_indexer_non_unique` when index contains multiple ``np.datetime64("NaT")`` and ``np.timedelta64("NaT")`` (:issue:`43869`)
538538
- Bug in setting a scalar :class:`Interval` value into a :class:`Series` with ``IntervalDtype`` when the scalar's sides are floats and the values' sides are integers (:issue:`44201`)
539539
- Bug when setting string-backed :class:`Categorical` values that can be parsed to datetimes into a :class:`DatetimeArray` or :class:`Series` or :class:`DataFrame` column backed by :class:`DatetimeArray` failing to parse these strings (:issue:`44236`)
540-
540+
- Bug in :meth:`Series.__setitem__` with an integer dtype other than ``int64`` setting with a ``range`` object unnecessarily upcasting to ``int64`` (:issue:`44261`)
541+
-
541542

542543
Missing
543544
^^^^^^^

pandas/core/dtypes/cast.py

+15
Original file line numberDiff line numberDiff line change
@@ -2197,6 +2197,9 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool:
21972197
tipo = maybe_infer_dtype_type(element)
21982198

21992199
if dtype.kind in ["i", "u"]:
2200+
if isinstance(element, range):
2201+
return _dtype_can_hold_range(element, dtype)
2202+
22002203
if tipo is not None:
22012204
if tipo.kind not in ["i", "u"]:
22022205
if is_float(element) and element.is_integer():
@@ -2209,6 +2212,7 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool:
22092212
# i.e. nullable IntegerDtype; we can put this into an ndarray
22102213
# losslessly iff it has no NAs
22112214
return not element._mask.any()
2215+
22122216
return True
22132217

22142218
# We have not inferred an integer from the dtype
@@ -2249,3 +2253,14 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool:
22492253
return isinstance(element, bytes) and len(element) <= dtype.itemsize
22502254

22512255
raise NotImplementedError(dtype)
2256+
2257+
2258+
def _dtype_can_hold_range(rng: range, dtype: np.dtype) -> bool:
2259+
"""
2260+
maybe_infer_dtype_type infers to int64 (and float64 for very large endpoints),
2261+
but in many cases a range can be held by a smaller integer dtype.
2262+
Check if this is one of those cases.
2263+
"""
2264+
if not len(rng):
2265+
return True
2266+
return np.can_cast(rng[0], dtype) and np.can_cast(rng[-1], dtype)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import numpy as np
2+
3+
from pandas.core.dtypes.cast import can_hold_element
4+
5+
6+
def test_can_hold_element_range(any_int_numpy_dtype):
7+
# GH#44261
8+
dtype = np.dtype(any_int_numpy_dtype)
9+
arr = np.array([], dtype=dtype)
10+
11+
rng = range(2, 127)
12+
assert can_hold_element(arr, rng)
13+
14+
# negatives -> can't be held by uint dtypes
15+
rng = range(-2, 127)
16+
if dtype.kind == "i":
17+
assert can_hold_element(arr, rng)
18+
else:
19+
assert not can_hold_element(arr, rng)
20+
21+
rng = range(2, 255)
22+
if dtype == "int8":
23+
assert not can_hold_element(arr, rng)
24+
else:
25+
assert can_hold_element(arr, rng)
26+
27+
rng = range(-255, 65537)
28+
if dtype.kind == "u":
29+
assert not can_hold_element(arr, rng)
30+
elif dtype.itemsize < 4:
31+
assert not can_hold_element(arr, rng)
32+
else:
33+
assert can_hold_element(arr, rng)
34+
35+
# empty
36+
rng = range(-(10 ** 10), -(10 ** 10))
37+
assert len(rng) == 0
38+
# assert can_hold_element(arr, rng)
39+
40+
rng = range(10 ** 10, 10 ** 10)
41+
assert len(rng) == 0
42+
assert can_hold_element(arr, rng)

pandas/tests/series/indexing/test_setitem.py

+41
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
import numpy as np
77
import pytest
88

9+
from pandas.core.dtypes.common import is_list_like
10+
911
from pandas import (
1012
Categorical,
1113
DataFrame,
@@ -622,6 +624,16 @@ def test_mask_key(self, obj, key, expected, val, indexer_sli):
622624
tm.assert_series_equal(obj, expected)
623625

624626
def test_series_where(self, obj, key, expected, val, is_inplace):
627+
if is_list_like(val) and len(val) < len(obj):
628+
# Series.where is not valid here
629+
if isinstance(val, range):
630+
return
631+
632+
# FIXME: The remaining TestSetitemDT64IntoInt that go through here
633+
# are relying on technically-incorrect behavior because Block.where
634+
# uses np.putmask instead of expressions.where in those cases,
635+
# which has different length-checking semantics.
636+
625637
mask = np.zeros(obj.shape, dtype=bool)
626638
mask[key] = True
627639

@@ -973,6 +985,35 @@ def expected(self, obj, val):
973985
return Series(idx)
974986

975987

988+
class TestSetitemRangeIntoIntegerSeries(SetitemCastingEquivalents):
989+
# GH#44261 Setting a range with sufficiently-small integers into
990+
# small-itemsize integer dtypes should not need to upcast
991+
992+
@pytest.fixture
993+
def obj(self, any_int_numpy_dtype):
994+
dtype = np.dtype(any_int_numpy_dtype)
995+
ser = Series(range(5), dtype=dtype)
996+
return ser
997+
998+
@pytest.fixture
999+
def val(self):
1000+
return range(2, 4)
1001+
1002+
@pytest.fixture
1003+
def key(self):
1004+
return slice(0, 2)
1005+
1006+
@pytest.fixture
1007+
def expected(self, any_int_numpy_dtype):
1008+
dtype = np.dtype(any_int_numpy_dtype)
1009+
exp = Series([2, 3, 2, 3, 4], dtype=dtype)
1010+
return exp
1011+
1012+
@pytest.fixture
1013+
def inplace(self):
1014+
return True
1015+
1016+
9761017
def test_setitem_int_as_positional_fallback_deprecation():
9771018
# GH#42215 deprecated falling back to positional on __setitem__ with an
9781019
# int not contained in the index

0 commit comments

Comments
 (0)