Skip to content

BUG: Series[Interval[int]][1] = np.nan incorrect coercion/raising #45568

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jan 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ Indexing
^^^^^^^^
- Bug in :meth:`loc.__getitem__` with a list of keys causing an internal inconsistency that could lead to a disconnect between ``frame.at[x, y]`` vs ``frame[y].loc[x]`` (:issue:`22372`)
- Bug in :meth:`DataFrame.iloc` where indexing a single row on a :class:`DataFrame` with a single ExtensionDtype column gave a copy instead of a view on the underlying data (:issue:`45241`)
- Bug in setting a NA value (``None`` or ``np.nan``) into a :class:`Series` with int-based :class:`IntervalDtype` incorrectly casting to object dtype instead of a float-based :class:`IntervalDtype` (:issue:`45568`)
- Bug in :meth:`Series.__setitem__` with a non-integer :class:`Index` when using an integer key to set a value that cannot be set inplace where a ``ValueError`` was raised insead of casting to a common dtype (:issue:`45070`)
- Bug when setting a value too large for a :class:`Series` dtype failing to coerce to a common type (:issue:`26049`, :issue:`32878`)
- Bug in :meth:`Series.__setitem__` when setting ``boolean`` dtype values containing ``NA`` incorrectly raising instead of casting to ``boolean`` dtype (:issue:`45462`)
Expand Down
29 changes: 6 additions & 23 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,7 @@

from pandas._config import get_option

from pandas._libs import (
NaT,
lib,
)
from pandas._libs import lib
from pandas._libs.interval import (
VALID_CLOSED,
Interval,
Expand All @@ -44,8 +41,6 @@

from pandas.core.dtypes.common import (
is_categorical_dtype,
is_datetime64_dtype,
is_datetime64tz_dtype,
is_dtype_equal,
is_float_dtype,
is_integer_dtype,
Expand All @@ -54,7 +49,6 @@
is_object_dtype,
is_scalar,
is_string_dtype,
is_timedelta64_dtype,
needs_i8_conversion,
pandas_dtype,
)
Expand Down Expand Up @@ -1103,30 +1097,23 @@ def _validate_scalar(self, value):
# TODO: check subdtype match like _validate_setitem_value?
elif is_valid_na_for_dtype(value, self.left.dtype):
# GH#18295
left = right = value
left = right = self.left._na_value
else:
raise TypeError(
"can only insert Interval objects and NA into an IntervalArray"
)
return left, right

def _validate_setitem_value(self, value):
needs_float_conversion = False

if is_valid_na_for_dtype(value, self.left.dtype):
# na value: need special casing to set directly on numpy arrays
value = self.left._na_value
if is_integer_dtype(self.dtype.subtype):
# can't set NaN on a numpy integer array
needs_float_conversion = True
elif is_datetime64_dtype(self.dtype.subtype):
# need proper NaT to set directly on the numpy array
value = np.datetime64("NaT")
elif is_datetime64tz_dtype(self.dtype.subtype):
# need proper NaT to set directly on the DatetimeArray array
value = NaT
elif is_timedelta64_dtype(self.dtype.subtype):
# need proper NaT to set directly on the numpy array
value = np.timedelta64("NaT")
# GH#45484 TypeError, not ValueError, matches what we get with
# non-NA un-holdable value.
raise TypeError("Cannot set float NaN to integer-backed IntervalArray")
value_left, value_right = value, value

elif isinstance(value, Interval):
Expand All @@ -1139,10 +1126,6 @@ def _validate_setitem_value(self, value):
else:
return self._validate_listlike(value)

if needs_float_conversion:
# GH#45484 TypeError, not ValueError, matches what we get with
# non-NA un-holdable value.
raise TypeError("Cannot set float NaN to integer-backed IntervalArray")
return value_left, value_right

def value_counts(self, dropna: bool = True):
Expand Down
13 changes: 11 additions & 2 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,8 +470,13 @@ def ensure_dtype_can_hold_na(dtype: DtypeObj) -> DtypeObj:
If we have a dtype that cannot hold NA values, find the best match that can.
"""
if isinstance(dtype, ExtensionDtype):
# TODO: ExtensionDtype.can_hold_na?
return dtype
if dtype._can_hold_na:
return dtype
elif isinstance(dtype, IntervalDtype):
# TODO(GH#45349): don't special-case IntervalDtype, allow
# overriding instead of returning object below.
return IntervalDtype(np.float64, closed=dtype.closed)
return _dtype_obj
elif dtype.kind == "b":
return _dtype_obj
elif dtype.kind in ["i", "u"]:
Expand Down Expand Up @@ -1470,6 +1475,10 @@ def find_result_type(left: ArrayLike, right: Any) -> DtypeObj:

new_dtype = np.result_type(left, right)

elif is_valid_na_for_dtype(right, left.dtype):
# e.g. IntervalDtype[int] and None/np.nan
new_dtype = ensure_dtype_can_hold_na(left.dtype)

else:
dtype, _ = infer_dtype_from(right, pandas_dtype=True)

Expand Down
12 changes: 12 additions & 0 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1117,6 +1117,18 @@ def __new__(cls, subtype=None, closed: str_type | None = None):
cls._cache_dtypes[key] = u
return u

@cache_readonly
def _can_hold_na(self) -> bool:
subtype = self._subtype
if subtype is None:
# partially-initialized
raise NotImplementedError(
"_can_hold_na is not defined for partially-initialized IntervalDtype"
)
if subtype.kind in ["i", "u"]:
return False
return True

@property
def closed(self):
return self._closed
Expand Down
13 changes: 9 additions & 4 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
from pandas.core.dtypes.cast import (
can_hold_element,
common_dtype_categorical_compat,
ensure_dtype_can_hold_na,
find_common_type,
infer_dtype_from,
maybe_cast_pointwise_result,
Expand Down Expand Up @@ -178,7 +179,6 @@
from pandas import (
CategoricalIndex,
DataFrame,
IntervalIndex,
MultiIndex,
Series,
)
Expand Down Expand Up @@ -6087,10 +6087,15 @@ def _find_common_type_compat(self, target) -> DtypeObj:
Implementation of find_common_type that adjusts for Index-specific
special cases.
"""
if is_interval_dtype(self.dtype) and is_valid_na_for_dtype(target, self.dtype):
if is_valid_na_for_dtype(target, self.dtype):
# e.g. setting NA value into IntervalArray[int64]
self = cast("IntervalIndex", self)
return IntervalDtype(np.float64, closed=self.closed)
dtype = ensure_dtype_can_hold_na(self.dtype)
if is_dtype_equal(self.dtype, dtype):
raise NotImplementedError(
"This should not be reached. Please report a bug at "
"github.com/pandas-dev/pandas"
)
return dtype

target_dtype, _ = infer_dtype_from(target, pandas_dtype=True)

Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
array,
concat,
date_range,
interval_range,
period_range,
timedelta_range,
)
Expand Down Expand Up @@ -740,6 +741,17 @@ def test_index_putmask(self, obj, key, expected, val):
@pytest.mark.parametrize(
"obj,expected,key",
[
pytest.param(
# GH#45568 setting a valid NA value into IntervalDtype[int] should
# cast to IntervalDtype[float]
Series(interval_range(1, 5)),
Series(
[Interval(1, 2), np.nan, Interval(3, 4), Interval(4, 5)],
dtype="interval[float64]",
),
1,
id="interval_int_na_value",
),
pytest.param(
# these induce dtype changes
Series([2, 3, 4, 5, 6, 7, 8, 9, 10]),
Expand Down
10 changes: 2 additions & 8 deletions pandas/tests/series/methods/test_convert_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import numpy as np
import pytest

from pandas.core.dtypes.common import is_interval_dtype

import pandas as pd
import pandas._testing as tm

Expand Down Expand Up @@ -203,12 +201,8 @@ def test_convert_dtypes(

# Test that it is a copy
copy = series.copy(deep=True)
if is_interval_dtype(result.dtype) and result.dtype.subtype.kind in ["i", "u"]:
msg = "Cannot set float NaN to integer-backed IntervalArray"
with pytest.raises(TypeError, match=msg):
result[result.notna()] = np.nan
else:
result[result.notna()] = np.nan

result[result.notna()] = np.nan

# Make sure original not changed
tm.assert_series_equal(series, copy)
Expand Down