Skip to content

Commit eae37b0

Browse files
authored
BUG: Series[Interval[int]][1] = np.nan incorrect coercion/raising (#45568)
1 parent 0f5d934 commit eae37b0

File tree

7 files changed

+53
-37
lines changed

7 files changed

+53
-37
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,7 @@ Indexing
260260
^^^^^^^^
261261
- Bug in :meth:`loc.__getitem__` with a list of keys causing an internal inconsistency that could lead to a disconnect between ``frame.at[x, y]`` vs ``frame[y].loc[x]`` (:issue:`22372`)
262262
- Bug in :meth:`DataFrame.iloc` where indexing a single row on a :class:`DataFrame` with a single ExtensionDtype column gave a copy instead of a view on the underlying data (:issue:`45241`)
263+
- Bug in setting a NA value (``None`` or ``np.nan``) into a :class:`Series` with int-based :class:`IntervalDtype` incorrectly casting to object dtype instead of a float-based :class:`IntervalDtype` (:issue:`45568`)
263264
- Bug in :meth:`Series.__setitem__` with a non-integer :class:`Index` when using an integer key to set a value that cannot be set inplace where a ``ValueError`` was raised insead of casting to a common dtype (:issue:`45070`)
264265
- Bug when setting a value too large for a :class:`Series` dtype failing to coerce to a common type (:issue:`26049`, :issue:`32878`)
265266
- Bug in :meth:`loc.__setitem__` treating ``range`` keys as positional instead of label-based (:issue:`45479`)

pandas/core/arrays/interval.py

+6-23
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,7 @@
1818

1919
from pandas._config import get_option
2020

21-
from pandas._libs import (
22-
NaT,
23-
lib,
24-
)
21+
from pandas._libs import lib
2522
from pandas._libs.interval import (
2623
VALID_CLOSED,
2724
Interval,
@@ -44,8 +41,6 @@
4441

4542
from pandas.core.dtypes.common import (
4643
is_categorical_dtype,
47-
is_datetime64_dtype,
48-
is_datetime64tz_dtype,
4944
is_dtype_equal,
5045
is_float_dtype,
5146
is_integer_dtype,
@@ -54,7 +49,6 @@
5449
is_object_dtype,
5550
is_scalar,
5651
is_string_dtype,
57-
is_timedelta64_dtype,
5852
needs_i8_conversion,
5953
pandas_dtype,
6054
)
@@ -1103,30 +1097,23 @@ def _validate_scalar(self, value):
11031097
# TODO: check subdtype match like _validate_setitem_value?
11041098
elif is_valid_na_for_dtype(value, self.left.dtype):
11051099
# GH#18295
1106-
left = right = value
1100+
left = right = self.left._na_value
11071101
else:
11081102
raise TypeError(
11091103
"can only insert Interval objects and NA into an IntervalArray"
11101104
)
11111105
return left, right
11121106

11131107
def _validate_setitem_value(self, value):
1114-
needs_float_conversion = False
11151108

11161109
if is_valid_na_for_dtype(value, self.left.dtype):
11171110
# na value: need special casing to set directly on numpy arrays
1111+
value = self.left._na_value
11181112
if is_integer_dtype(self.dtype.subtype):
11191113
# can't set NaN on a numpy integer array
1120-
needs_float_conversion = True
1121-
elif is_datetime64_dtype(self.dtype.subtype):
1122-
# need proper NaT to set directly on the numpy array
1123-
value = np.datetime64("NaT")
1124-
elif is_datetime64tz_dtype(self.dtype.subtype):
1125-
# need proper NaT to set directly on the DatetimeArray array
1126-
value = NaT
1127-
elif is_timedelta64_dtype(self.dtype.subtype):
1128-
# need proper NaT to set directly on the numpy array
1129-
value = np.timedelta64("NaT")
1114+
# GH#45484 TypeError, not ValueError, matches what we get with
1115+
# non-NA un-holdable value.
1116+
raise TypeError("Cannot set float NaN to integer-backed IntervalArray")
11301117
value_left, value_right = value, value
11311118

11321119
elif isinstance(value, Interval):
@@ -1139,10 +1126,6 @@ def _validate_setitem_value(self, value):
11391126
else:
11401127
return self._validate_listlike(value)
11411128

1142-
if needs_float_conversion:
1143-
# GH#45484 TypeError, not ValueError, matches what we get with
1144-
# non-NA un-holdable value.
1145-
raise TypeError("Cannot set float NaN to integer-backed IntervalArray")
11461129
return value_left, value_right
11471130

11481131
def value_counts(self, dropna: bool = True):

pandas/core/dtypes/cast.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -470,8 +470,13 @@ def ensure_dtype_can_hold_na(dtype: DtypeObj) -> DtypeObj:
470470
If we have a dtype that cannot hold NA values, find the best match that can.
471471
"""
472472
if isinstance(dtype, ExtensionDtype):
473-
# TODO: ExtensionDtype.can_hold_na?
474-
return dtype
473+
if dtype._can_hold_na:
474+
return dtype
475+
elif isinstance(dtype, IntervalDtype):
476+
# TODO(GH#45349): don't special-case IntervalDtype, allow
477+
# overriding instead of returning object below.
478+
return IntervalDtype(np.float64, closed=dtype.closed)
479+
return _dtype_obj
475480
elif dtype.kind == "b":
476481
return _dtype_obj
477482
elif dtype.kind in ["i", "u"]:
@@ -1470,6 +1475,10 @@ def find_result_type(left: ArrayLike, right: Any) -> DtypeObj:
14701475

14711476
new_dtype = np.result_type(left, right)
14721477

1478+
elif is_valid_na_for_dtype(right, left.dtype):
1479+
# e.g. IntervalDtype[int] and None/np.nan
1480+
new_dtype = ensure_dtype_can_hold_na(left.dtype)
1481+
14731482
else:
14741483
dtype, _ = infer_dtype_from(right, pandas_dtype=True)
14751484

pandas/core/dtypes/dtypes.py

+12
Original file line numberDiff line numberDiff line change
@@ -1117,6 +1117,18 @@ def __new__(cls, subtype=None, closed: str_type | None = None):
11171117
cls._cache_dtypes[key] = u
11181118
return u
11191119

1120+
@cache_readonly
1121+
def _can_hold_na(self) -> bool:
1122+
subtype = self._subtype
1123+
if subtype is None:
1124+
# partially-initialized
1125+
raise NotImplementedError(
1126+
"_can_hold_na is not defined for partially-initialized IntervalDtype"
1127+
)
1128+
if subtype.kind in ["i", "u"]:
1129+
return False
1130+
return True
1131+
11201132
@property
11211133
def closed(self):
11221134
return self._closed

pandas/core/indexes/base.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
from pandas.core.dtypes.cast import (
7171
can_hold_element,
7272
common_dtype_categorical_compat,
73+
ensure_dtype_can_hold_na,
7374
find_common_type,
7475
infer_dtype_from,
7576
maybe_cast_pointwise_result,
@@ -177,7 +178,6 @@
177178
from pandas import (
178179
CategoricalIndex,
179180
DataFrame,
180-
IntervalIndex,
181181
MultiIndex,
182182
Series,
183183
)
@@ -6097,10 +6097,15 @@ def _find_common_type_compat(self, target) -> DtypeObj:
60976097
Implementation of find_common_type that adjusts for Index-specific
60986098
special cases.
60996099
"""
6100-
if is_interval_dtype(self.dtype) and is_valid_na_for_dtype(target, self.dtype):
6100+
if is_valid_na_for_dtype(target, self.dtype):
61016101
# e.g. setting NA value into IntervalArray[int64]
6102-
self = cast("IntervalIndex", self)
6103-
return IntervalDtype(np.float64, closed=self.closed)
6102+
dtype = ensure_dtype_can_hold_na(self.dtype)
6103+
if is_dtype_equal(self.dtype, dtype):
6104+
raise NotImplementedError(
6105+
"This should not be reached. Please report a bug at "
6106+
"github.com/pandas-dev/pandas"
6107+
)
6108+
return dtype
61046109

61056110
target_dtype, _ = infer_dtype_from(target, pandas_dtype=True)
61066111

pandas/tests/series/indexing/test_setitem.py

+12
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
array,
2525
concat,
2626
date_range,
27+
interval_range,
2728
period_range,
2829
timedelta_range,
2930
)
@@ -740,6 +741,17 @@ def test_index_putmask(self, obj, key, expected, val):
740741
@pytest.mark.parametrize(
741742
"obj,expected,key",
742743
[
744+
pytest.param(
745+
# GH#45568 setting a valid NA value into IntervalDtype[int] should
746+
# cast to IntervalDtype[float]
747+
Series(interval_range(1, 5)),
748+
Series(
749+
[Interval(1, 2), np.nan, Interval(3, 4), Interval(4, 5)],
750+
dtype="interval[float64]",
751+
),
752+
1,
753+
id="interval_int_na_value",
754+
),
743755
pytest.param(
744756
# these induce dtype changes
745757
Series([2, 3, 4, 5, 6, 7, 8, 9, 10]),

pandas/tests/series/methods/test_convert_dtypes.py

+2-8
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import numpy as np
44
import pytest
55

6-
from pandas.core.dtypes.common import is_interval_dtype
7-
86
import pandas as pd
97
import pandas._testing as tm
108

@@ -203,12 +201,8 @@ def test_convert_dtypes(
203201

204202
# Test that it is a copy
205203
copy = series.copy(deep=True)
206-
if is_interval_dtype(result.dtype) and result.dtype.subtype.kind in ["i", "u"]:
207-
msg = "Cannot set float NaN to integer-backed IntervalArray"
208-
with pytest.raises(TypeError, match=msg):
209-
result[result.notna()] = np.nan
210-
else:
211-
result[result.notna()] = np.nan
204+
205+
result[result.notna()] = np.nan
212206

213207
# Make sure original not changed
214208
tm.assert_series_equal(series, copy)

0 commit comments

Comments
 (0)