Skip to content

Commit d3dc5c1

Browse files
jbrockmendelluckyvs1
authored andcommitted
BUG: Series.__setitem__ with mismatched IntervalDtype (pandas-dev#39120)
1 parent 2a4eae6 commit d3dc5c1

File tree

8 files changed

+249
-31
lines changed

8 files changed

+249
-31
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,7 @@ Indexing
249249
- Bug in :meth:`DataFrame.iloc.__setitem__` and :meth:`DataFrame.loc.__setitem__` with mixed dtypes when setting with a dictionary value (:issue:`38335`)
250250
- Bug in :meth:`DataFrame.loc` dropping levels of :class:`MultiIndex` when :class:`DataFrame` used as input has only one row (:issue:`10521`)
251251
- Bug in setting ``timedelta64`` values into numeric :class:`Series` failing to cast to object dtype (:issue:`39086`)
252+
- Bug in setting :class:`Interval` values into a :class:`Series` or :class:`DataFrame` with mismatched :class:`IntervalDtype` incorrectly casting the new values to the existing dtype (:issue:`39120`)
252253

253254
Missing
254255
^^^^^^^

pandas/core/arrays/datetimelike.py

+12
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,18 @@ def _validate_listlike(self, value, allow_object: bool = False):
613613
# We treat empty list as our own dtype.
614614
return type(self)._from_sequence([], dtype=self.dtype)
615615

616+
if hasattr(value, "dtype") and value.dtype == object:
617+
# `array` below won't do inference if value is an Index or Series.
618+
# so do so here. in the Index case, inferred_type may be cached.
619+
if lib.infer_dtype(value) in self._infer_matches:
620+
try:
621+
value = type(self)._from_sequence(value)
622+
except (ValueError, TypeError):
623+
if allow_object:
624+
return value
625+
msg = self._validation_error_message(value, True)
626+
raise TypeError(msg)
627+
616628
# Do type inference if necessary up front
617629
# e.g. we passed PeriodIndex.values and got an ndarray of Periods
618630
value = array(value)

pandas/core/arrays/interval.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -955,12 +955,22 @@ def _validate_listlike(self, value):
955955
# list-like of intervals
956956
try:
957957
array = IntervalArray(value)
958-
# TODO: self._check_closed_matches(array, name="value")
958+
self._check_closed_matches(array, name="value")
959959
value_left, value_right = array.left, array.right
960960
except TypeError as err:
961961
# wrong type: not interval or NA
962962
msg = f"'value' should be an interval type, got {type(value)} instead."
963963
raise TypeError(msg) from err
964+
965+
try:
966+
self.left._validate_fill_value(value_left)
967+
except (ValueError, TypeError) as err:
968+
msg = (
969+
"'value' should be a compatible interval type, "
970+
f"got {type(value)} instead."
971+
)
972+
raise TypeError(msg) from err
973+
964974
return value_left, value_right
965975

966976
def _validate_scalar(self, value):
@@ -995,10 +1005,12 @@ def _validate_setitem_value(self, value):
9951005
value = np.timedelta64("NaT")
9961006
value_left, value_right = value, value
9971007

998-
elif is_interval_dtype(value) or isinstance(value, Interval):
1008+
elif isinstance(value, Interval):
9991009
# scalar interval
10001010
self._check_closed_matches(value, name="value")
10011011
value_left, value_right = value.left, value.right
1012+
self.left._validate_fill_value(value_left)
1013+
self.left._validate_fill_value(value_right)
10021014

10031015
else:
10041016
return self._validate_listlike(value)

pandas/core/indexes/numeric.py

+4
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,10 @@ def _validate_fill_value(self, value):
152152
raise TypeError
153153
value = int(value)
154154

155+
elif hasattr(value, "dtype") and value.dtype.kind in ["m", "M"]:
156+
# TODO: if we're checking arraylike here, do so systematically
157+
raise TypeError
158+
155159
return value
156160

157161
def _convert_tolerance(self, tolerance, target):

pandas/core/internals/blocks.py

+21-24
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import inspect
22
import re
3-
from typing import TYPE_CHECKING, Any, List, Optional, Type, Union, cast
3+
from typing import TYPE_CHECKING, Any, Callable, List, Optional, Type, Union, cast
44

55
import numpy as np
66

@@ -28,7 +28,6 @@
2828
infer_dtype_from_scalar,
2929
maybe_downcast_numeric,
3030
maybe_downcast_to_dtype,
31-
maybe_infer_dtype_type,
3231
maybe_promote,
3332
maybe_upcast,
3433
soft_convert_objects,
@@ -51,7 +50,7 @@
5150
)
5251
from pandas.core.dtypes.dtypes import CategoricalDtype, ExtensionDtype
5352
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndex, ABCPandasArray, ABCSeries
54-
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna
53+
from pandas.core.dtypes.missing import isna
5554

5655
import pandas.core.algorithms as algos
5756
from pandas.core.array_algos.putmask import (
@@ -1879,7 +1878,24 @@ def _unstack(self, unstacker, fill_value, new_placement):
18791878
return blocks, mask
18801879

18811880

1882-
class ObjectValuesExtensionBlock(ExtensionBlock):
1881+
class HybridMixin:
1882+
"""
1883+
Mixin for Blocks backed (maybe indirectly) by ExtensionArrays.
1884+
"""
1885+
1886+
array_values: Callable
1887+
1888+
def _can_hold_element(self, element: Any) -> bool:
1889+
values = self.array_values()
1890+
1891+
try:
1892+
values._validate_setitem_value(element)
1893+
return True
1894+
except (ValueError, TypeError):
1895+
return False
1896+
1897+
1898+
class ObjectValuesExtensionBlock(HybridMixin, ExtensionBlock):
18831899
"""
18841900
Block providing backwards-compatibility for `.values`.
18851901
@@ -1890,16 +1906,6 @@ class ObjectValuesExtensionBlock(ExtensionBlock):
18901906
def external_values(self):
18911907
return self.values.astype(object)
18921908

1893-
def _can_hold_element(self, element: Any) -> bool:
1894-
if is_valid_nat_for_dtype(element, self.dtype):
1895-
return True
1896-
if isinstance(element, list) and len(element) == 0:
1897-
return True
1898-
tipo = maybe_infer_dtype_type(element)
1899-
if tipo is not None:
1900-
return issubclass(tipo.type, self.dtype.type)
1901-
return isinstance(element, self.dtype.type)
1902-
19031909

19041910
class NumericBlock(Block):
19051911
__slots__ = ()
@@ -1959,7 +1965,7 @@ class IntBlock(NumericBlock):
19591965
_can_hold_na = False
19601966

19611967

1962-
class DatetimeLikeBlockMixin(Block):
1968+
class DatetimeLikeBlockMixin(HybridMixin, Block):
19631969
"""Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock."""
19641970

19651971
_can_hold_na = True
@@ -2042,15 +2048,6 @@ def where(self, other, cond, errors="raise", axis: int = 0) -> List["Block"]:
20422048
nb = self.make_block_same_class(res_values)
20432049
return [nb]
20442050

2045-
def _can_hold_element(self, element: Any) -> bool:
2046-
arr = self.array_values()
2047-
2048-
try:
2049-
arr._validate_setitem_value(element)
2050-
return True
2051-
except (TypeError, ValueError):
2052-
return False
2053-
20542051

20552052
class DatetimeBlock(DatetimeLikeBlockMixin):
20562053
__slots__ = ()

pandas/tests/arrays/interval/test_interval.py

+25
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,31 @@ def test_set_na(self, left_right_dtypes):
123123

124124
tm.assert_extension_array_equal(result, expected)
125125

126+
def test_setitem_mismatched_closed(self):
127+
arr = IntervalArray.from_breaks(range(4))
128+
orig = arr.copy()
129+
other = arr.set_closed("both")
130+
131+
msg = "'value.closed' is 'both', expected 'right'"
132+
with pytest.raises(ValueError, match=msg):
133+
arr[0] = other[0]
134+
with pytest.raises(ValueError, match=msg):
135+
arr[:1] = other[:1]
136+
with pytest.raises(ValueError, match=msg):
137+
arr[:0] = other[:0]
138+
with pytest.raises(ValueError, match=msg):
139+
arr[:] = other[::-1]
140+
with pytest.raises(ValueError, match=msg):
141+
arr[:] = list(other[::-1])
142+
with pytest.raises(ValueError, match=msg):
143+
arr[:] = other[::-1].astype(object)
144+
with pytest.raises(ValueError, match=msg):
145+
arr[:] = other[::-1].astype("category")
146+
147+
# empty list should be no-op
148+
arr[:0] = []
149+
tm.assert_interval_array_equal(arr, orig)
150+
126151

127152
def test_repr():
128153
# GH 25022

pandas/tests/arrays/test_datetimelike.py

+33-4
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,9 @@
88
from pandas.compat.numpy import np_version_under1p18
99

1010
import pandas as pd
11+
from pandas import DatetimeIndex, Index, Period, PeriodIndex, TimedeltaIndex
1112
import pandas._testing as tm
12-
from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray
13-
from pandas.core.indexes.datetimes import DatetimeIndex
14-
from pandas.core.indexes.period import Period, PeriodIndex
15-
from pandas.core.indexes.timedeltas import TimedeltaIndex
13+
from pandas.core.arrays import DatetimeArray, PandasArray, PeriodArray, TimedeltaArray
1614

1715

1816
# TODO: more freq variants
@@ -402,6 +400,37 @@ def test_setitem(self):
402400
expected[:2] = expected[-2:]
403401
tm.assert_numpy_array_equal(arr.asi8, expected)
404402

403+
@pytest.mark.parametrize(
404+
"box",
405+
[
406+
Index,
407+
pd.Series,
408+
np.array,
409+
list,
410+
PandasArray,
411+
],
412+
)
413+
def test_setitem_object_dtype(self, box, arr1d):
414+
415+
expected = arr1d.copy()[::-1]
416+
if expected.dtype.kind in ["m", "M"]:
417+
expected = expected._with_freq(None)
418+
419+
vals = expected
420+
if box is list:
421+
vals = list(vals)
422+
elif box is np.array:
423+
# if we do np.array(x).astype(object) then dt64 and td64 cast to ints
424+
vals = np.array(vals.astype(object))
425+
elif box is PandasArray:
426+
vals = box(np.asarray(vals, dtype=object))
427+
else:
428+
vals = box(vals).astype(object)
429+
430+
arr1d[:] = vals
431+
432+
tm.assert_equal(arr1d, expected)
433+
405434
def test_setitem_strs(self, arr1d, request):
406435
# Check that we parse strs in both scalar and listlike
407436
if isinstance(arr1d, DatetimeArray):

0 commit comments

Comments
 (0)