diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 4e73ea348dfde..da42b2c228ec6 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -259,6 +259,7 @@ Indexing ^^^^^^^^ - Bug in :meth:`loc.__getitem__` with a list of keys causing an internal inconsistency that could lead to a disconnect between ``frame.at[x, y]`` vs ``frame[y].loc[x]`` (:issue:`22372`) - Bug in :meth:`DataFrame.iloc` where indexing a single row on a :class:`DataFrame` with a single ExtensionDtype column gave a copy instead of a view on the underlying data (:issue:`45241`) +- Bug in setting a NA value (``None`` or ``np.nan``) into a :class:`Series` with int-based :class:`IntervalDtype` incorrectly casting to object dtype instead of a float-based :class:`IntervalDtype` (:issue:`45568`) - Bug in :meth:`Series.__setitem__` with a non-integer :class:`Index` when using an integer key to set a value that cannot be set inplace where a ``ValueError`` was raised insead of casting to a common dtype (:issue:`45070`) - Bug when setting a value too large for a :class:`Series` dtype failing to coerce to a common type (:issue:`26049`, :issue:`32878`) - Bug in :meth:`Series.__setitem__` when setting ``boolean`` dtype values containing ``NA`` incorrectly raising instead of casting to ``boolean`` dtype (:issue:`45462`) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 9ceded00bf159..33732bcaca733 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -18,10 +18,7 @@ from pandas._config import get_option -from pandas._libs import ( - NaT, - lib, -) +from pandas._libs import lib from pandas._libs.interval import ( VALID_CLOSED, Interval, @@ -44,8 +41,6 @@ from pandas.core.dtypes.common import ( is_categorical_dtype, - is_datetime64_dtype, - is_datetime64tz_dtype, is_dtype_equal, is_float_dtype, is_integer_dtype, @@ -54,7 +49,6 @@ is_object_dtype, is_scalar, is_string_dtype, - is_timedelta64_dtype, needs_i8_conversion, pandas_dtype, ) @@ -1103,7 +1097,7 @@ def _validate_scalar(self, value): # TODO: check subdtype match like _validate_setitem_value? elif is_valid_na_for_dtype(value, self.left.dtype): # GH#18295 - left = right = value + left = right = self.left._na_value else: raise TypeError( "can only insert Interval objects and NA into an IntervalArray" @@ -1111,22 +1105,15 @@ def _validate_scalar(self, value): return left, right def _validate_setitem_value(self, value): - needs_float_conversion = False if is_valid_na_for_dtype(value, self.left.dtype): # na value: need special casing to set directly on numpy arrays + value = self.left._na_value if is_integer_dtype(self.dtype.subtype): # can't set NaN on a numpy integer array - needs_float_conversion = True - elif is_datetime64_dtype(self.dtype.subtype): - # need proper NaT to set directly on the numpy array - value = np.datetime64("NaT") - elif is_datetime64tz_dtype(self.dtype.subtype): - # need proper NaT to set directly on the DatetimeArray array - value = NaT - elif is_timedelta64_dtype(self.dtype.subtype): - # need proper NaT to set directly on the numpy array - value = np.timedelta64("NaT") + # GH#45484 TypeError, not ValueError, matches what we get with + # non-NA un-holdable value. + raise TypeError("Cannot set float NaN to integer-backed IntervalArray") value_left, value_right = value, value elif isinstance(value, Interval): @@ -1139,10 +1126,6 @@ def _validate_setitem_value(self, value): else: return self._validate_listlike(value) - if needs_float_conversion: - # GH#45484 TypeError, not ValueError, matches what we get with - # non-NA un-holdable value. - raise TypeError("Cannot set float NaN to integer-backed IntervalArray") return value_left, value_right def value_counts(self, dropna: bool = True): diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 92f3cfdc589ff..fe5b464a5a18d 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -470,8 +470,13 @@ def ensure_dtype_can_hold_na(dtype: DtypeObj) -> DtypeObj: If we have a dtype that cannot hold NA values, find the best match that can. """ if isinstance(dtype, ExtensionDtype): - # TODO: ExtensionDtype.can_hold_na? - return dtype + if dtype._can_hold_na: + return dtype + elif isinstance(dtype, IntervalDtype): + # TODO(GH#45349): don't special-case IntervalDtype, allow + # overriding instead of returning object below. + return IntervalDtype(np.float64, closed=dtype.closed) + return _dtype_obj elif dtype.kind == "b": return _dtype_obj elif dtype.kind in ["i", "u"]: @@ -1470,6 +1475,10 @@ def find_result_type(left: ArrayLike, right: Any) -> DtypeObj: new_dtype = np.result_type(left, right) + elif is_valid_na_for_dtype(right, left.dtype): + # e.g. IntervalDtype[int] and None/np.nan + new_dtype = ensure_dtype_can_hold_na(left.dtype) + else: dtype, _ = infer_dtype_from(right, pandas_dtype=True) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index e74d73b84e94b..9a6d4748380a6 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1117,6 +1117,18 @@ def __new__(cls, subtype=None, closed: str_type | None = None): cls._cache_dtypes[key] = u return u + @cache_readonly + def _can_hold_na(self) -> bool: + subtype = self._subtype + if subtype is None: + # partially-initialized + raise NotImplementedError( + "_can_hold_na is not defined for partially-initialized IntervalDtype" + ) + if subtype.kind in ["i", "u"]: + return False + return True + @property def closed(self): return self._closed diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 26ddf89adaea2..ab2413dd06a45 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -70,6 +70,7 @@ from pandas.core.dtypes.cast import ( can_hold_element, common_dtype_categorical_compat, + ensure_dtype_can_hold_na, find_common_type, infer_dtype_from, maybe_cast_pointwise_result, @@ -178,7 +179,6 @@ from pandas import ( CategoricalIndex, DataFrame, - IntervalIndex, MultiIndex, Series, ) @@ -6087,10 +6087,15 @@ def _find_common_type_compat(self, target) -> DtypeObj: Implementation of find_common_type that adjusts for Index-specific special cases. """ - if is_interval_dtype(self.dtype) and is_valid_na_for_dtype(target, self.dtype): + if is_valid_na_for_dtype(target, self.dtype): # e.g. setting NA value into IntervalArray[int64] - self = cast("IntervalIndex", self) - return IntervalDtype(np.float64, closed=self.closed) + dtype = ensure_dtype_can_hold_na(self.dtype) + if is_dtype_equal(self.dtype, dtype): + raise NotImplementedError( + "This should not be reached. Please report a bug at " + "github.com/pandas-dev/pandas" + ) + return dtype target_dtype, _ = infer_dtype_from(target, pandas_dtype=True) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 8501c7146f371..9d1ee70c265e8 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -24,6 +24,7 @@ array, concat, date_range, + interval_range, period_range, timedelta_range, ) @@ -740,6 +741,17 @@ def test_index_putmask(self, obj, key, expected, val): @pytest.mark.parametrize( "obj,expected,key", [ + pytest.param( + # GH#45568 setting a valid NA value into IntervalDtype[int] should + # cast to IntervalDtype[float] + Series(interval_range(1, 5)), + Series( + [Interval(1, 2), np.nan, Interval(3, 4), Interval(4, 5)], + dtype="interval[float64]", + ), + 1, + id="interval_int_na_value", + ), pytest.param( # these induce dtype changes Series([2, 3, 4, 5, 6, 7, 8, 9, 10]), diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index 3a5fcf5c2929c..25cbcf2a84490 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -3,8 +3,6 @@ import numpy as np import pytest -from pandas.core.dtypes.common import is_interval_dtype - import pandas as pd import pandas._testing as tm @@ -203,12 +201,8 @@ def test_convert_dtypes( # Test that it is a copy copy = series.copy(deep=True) - if is_interval_dtype(result.dtype) and result.dtype.subtype.kind in ["i", "u"]: - msg = "Cannot set float NaN to integer-backed IntervalArray" - with pytest.raises(TypeError, match=msg): - result[result.notna()] = np.nan - else: - result[result.notna()] = np.nan + + result[result.notna()] = np.nan # Make sure original not changed tm.assert_series_equal(series, copy)