Skip to content

BUG: Series.__setitem__ with mismatched IntervalDtype #39120

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jan 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ Indexing
- Bug in :meth:`DataFrame.iloc.__setitem__` and :meth:`DataFrame.loc.__setitem__` with mixed dtypes when setting with a dictionary value (:issue:`38335`)
- Bug in :meth:`DataFrame.loc` dropping levels of :class:`MultiIndex` when :class:`DataFrame` used as input has only one row (:issue:`10521`)
- Bug in setting ``timedelta64`` values into numeric :class:`Series` failing to cast to object dtype (:issue:`39086`)
- Bug in setting :class:`Interval` values into a :class:`Series` or :class:`DataFrame` with mismatched :class:`IntervalDtype` incorrectly casting the new values to the existing dtype (:issue:`39120`)

Missing
^^^^^^^
Expand Down
12 changes: 12 additions & 0 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,18 @@ def _validate_listlike(self, value, allow_object: bool = False):
# We treat empty list as our own dtype.
return type(self)._from_sequence([], dtype=self.dtype)

if hasattr(value, "dtype") and value.dtype == object:
# `array` below won't do inference if value is an Index or Series.
# so do so here. in the Index case, inferred_type may be cached.
if lib.infer_dtype(value) in self._infer_matches:
try:
value = type(self)._from_sequence(value)
except (ValueError, TypeError):
if allow_object:
return value
msg = self._validation_error_message(value, True)
raise TypeError(msg)

# Do type inference if necessary up front
# e.g. we passed PeriodIndex.values and got an ndarray of Periods
value = array(value)
Expand Down
16 changes: 14 additions & 2 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -955,12 +955,22 @@ def _validate_listlike(self, value):
# list-like of intervals
try:
array = IntervalArray(value)
# TODO: self._check_closed_matches(array, name="value")
self._check_closed_matches(array, name="value")
value_left, value_right = array.left, array.right
except TypeError as err:
# wrong type: not interval or NA
msg = f"'value' should be an interval type, got {type(value)} instead."
raise TypeError(msg) from err

try:
self.left._validate_fill_value(value_left)
except (ValueError, TypeError) as err:
msg = (
"'value' should be a compatible interval type, "
f"got {type(value)} instead."
)
raise TypeError(msg) from err

return value_left, value_right

def _validate_scalar(self, value):
Expand Down Expand Up @@ -995,10 +1005,12 @@ def _validate_setitem_value(self, value):
value = np.timedelta64("NaT")
value_left, value_right = value, value

elif is_interval_dtype(value) or isinstance(value, Interval):
elif isinstance(value, Interval):
# scalar interval
self._check_closed_matches(value, name="value")
value_left, value_right = value.left, value.right
self.left._validate_fill_value(value_left)
self.left._validate_fill_value(value_right)

else:
return self._validate_listlike(value)
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,10 @@ def _validate_fill_value(self, value):
raise TypeError
value = int(value)

elif hasattr(value, "dtype") and value.dtype.kind in ["m", "M"]:
# TODO: if we're checking arraylike here, do so systematically
raise TypeError

return value

def _convert_tolerance(self, tolerance, target):
Expand Down
45 changes: 21 additions & 24 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import inspect
import re
from typing import TYPE_CHECKING, Any, List, Optional, Type, Union, cast
from typing import TYPE_CHECKING, Any, Callable, List, Optional, Type, Union, cast

import numpy as np

Expand Down Expand Up @@ -28,7 +28,6 @@
infer_dtype_from_scalar,
maybe_downcast_numeric,
maybe_downcast_to_dtype,
maybe_infer_dtype_type,
maybe_promote,
maybe_upcast,
soft_convert_objects,
Expand All @@ -51,7 +50,7 @@
)
from pandas.core.dtypes.dtypes import CategoricalDtype, ExtensionDtype
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndex, ABCPandasArray, ABCSeries
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna
from pandas.core.dtypes.missing import isna

import pandas.core.algorithms as algos
from pandas.core.array_algos.putmask import (
Expand Down Expand Up @@ -1879,7 +1878,24 @@ def _unstack(self, unstacker, fill_value, new_placement):
return blocks, mask


class ObjectValuesExtensionBlock(ExtensionBlock):
class HybridMixin:
"""
Mixin for Blocks backed (maybe indirectly) by ExtensionArrays.
"""

array_values: Callable

def _can_hold_element(self, element: Any) -> bool:
values = self.array_values()

try:
values._validate_setitem_value(element)
return True
except (ValueError, TypeError):
return False


class ObjectValuesExtensionBlock(HybridMixin, ExtensionBlock):
"""
Block providing backwards-compatibility for `.values`.

Expand All @@ -1890,16 +1906,6 @@ class ObjectValuesExtensionBlock(ExtensionBlock):
def external_values(self):
return self.values.astype(object)

def _can_hold_element(self, element: Any) -> bool:
if is_valid_nat_for_dtype(element, self.dtype):
return True
if isinstance(element, list) and len(element) == 0:
return True
tipo = maybe_infer_dtype_type(element)
if tipo is not None:
return issubclass(tipo.type, self.dtype.type)
return isinstance(element, self.dtype.type)


class NumericBlock(Block):
__slots__ = ()
Expand Down Expand Up @@ -1959,7 +1965,7 @@ class IntBlock(NumericBlock):
_can_hold_na = False


class DatetimeLikeBlockMixin(Block):
class DatetimeLikeBlockMixin(HybridMixin, Block):
"""Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock."""

_can_hold_na = True
Expand Down Expand Up @@ -2042,15 +2048,6 @@ def where(self, other, cond, errors="raise", axis: int = 0) -> List["Block"]:
nb = self.make_block_same_class(res_values)
return [nb]

def _can_hold_element(self, element: Any) -> bool:
arr = self.array_values()

try:
arr._validate_setitem_value(element)
return True
except (TypeError, ValueError):
return False


class DatetimeBlock(DatetimeLikeBlockMixin):
__slots__ = ()
Expand Down
25 changes: 25 additions & 0 deletions pandas/tests/arrays/interval/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,31 @@ def test_set_na(self, left_right_dtypes):

tm.assert_extension_array_equal(result, expected)

def test_setitem_mismatched_closed(self):
arr = IntervalArray.from_breaks(range(4))
orig = arr.copy()
other = arr.set_closed("both")

msg = "'value.closed' is 'both', expected 'right'"
with pytest.raises(ValueError, match=msg):
arr[0] = other[0]
with pytest.raises(ValueError, match=msg):
arr[:1] = other[:1]
with pytest.raises(ValueError, match=msg):
arr[:0] = other[:0]
with pytest.raises(ValueError, match=msg):
arr[:] = other[::-1]
with pytest.raises(ValueError, match=msg):
arr[:] = list(other[::-1])
with pytest.raises(ValueError, match=msg):
arr[:] = other[::-1].astype(object)
with pytest.raises(ValueError, match=msg):
arr[:] = other[::-1].astype("category")

# empty list should be no-op
arr[:0] = []
tm.assert_interval_array_equal(arr, orig)


def test_repr():
# GH 25022
Expand Down
37 changes: 33 additions & 4 deletions pandas/tests/arrays/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,9 @@
from pandas.compat.numpy import np_version_under1p18

import pandas as pd
from pandas import DatetimeIndex, Index, Period, PeriodIndex, TimedeltaIndex
import pandas._testing as tm
from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.core.indexes.period import Period, PeriodIndex
from pandas.core.indexes.timedeltas import TimedeltaIndex
from pandas.core.arrays import DatetimeArray, PandasArray, PeriodArray, TimedeltaArray


# TODO: more freq variants
Expand Down Expand Up @@ -402,6 +400,37 @@ def test_setitem(self):
expected[:2] = expected[-2:]
tm.assert_numpy_array_equal(arr.asi8, expected)

@pytest.mark.parametrize(
"box",
[
Index,
pd.Series,
np.array,
list,
PandasArray,
],
)
def test_setitem_object_dtype(self, box, arr1d):

expected = arr1d.copy()[::-1]
if expected.dtype.kind in ["m", "M"]:
expected = expected._with_freq(None)

vals = expected
if box is list:
vals = list(vals)
elif box is np.array:
# if we do np.array(x).astype(object) then dt64 and td64 cast to ints
vals = np.array(vals.astype(object))
elif box is PandasArray:
vals = box(np.asarray(vals, dtype=object))
else:
vals = box(vals).astype(object)

arr1d[:] = vals

tm.assert_equal(arr1d, expected)

def test_setitem_strs(self, arr1d, request):
# Check that we parse strs in both scalar and listlike
if isinstance(arr1d, DatetimeArray):
Expand Down
Loading