Skip to content

BUG: Fix Series(List[Interval]) to infer interval dtype #28399

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 6, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -884,6 +884,7 @@ Interval

- Bug in :meth:`IntervalIndex.get_indexer` where a :class:`Categorical` or :class:`CategoricalIndex` ``target`` would incorrectly raise a ``TypeError`` (:issue:`30063`)
- Bug in ``pandas.core.dtypes.cast.infer_dtype_from_scalar`` where passing ``pandas_dtype=True`` did not infer :class:`IntervalDtype` (:issue:`30337`)
- Bug in :class:`Series` constructor where constructing a ``Series`` from a ``list`` of :class:`Interval` objects resulted in ``object`` dtype instead of :class:`IntervalDtype` (:issue:`23563`)
- Bug in :class:`IntervalDtype` where the ``kind`` attribute was incorrectly set as ``None`` instead of ``"O"`` (:issue:`30568`)
- Bug in :class:`IntervalIndex`, :class:`~arrays.IntervalArray`, and :class:`Series` with interval data where equality comparisons were incorrect (:issue:`24112`)

Expand Down
9 changes: 2 additions & 7 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,13 +497,8 @@ def sanitize_array(

if is_object_dtype(subarr.dtype) and not is_object_dtype(dtype):
inferred = lib.infer_dtype(subarr, skipna=False)
if inferred == "period":
from pandas.core.arrays import period_array

try:
subarr = period_array(subarr)
except IncompatibleFrequency:
pass
if inferred in {"interval", "period"}:
subarr = array(subarr)

return subarr

Expand Down
47 changes: 32 additions & 15 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
timedelta_range,
)
import pandas._testing as tm
from pandas.core.arrays import period_array
from pandas.core.arrays import IntervalArray, period_array


class TestSeriesConstructors:
Expand Down Expand Up @@ -967,16 +967,34 @@ def test_constructor_with_naive_string_and_datetimetz_dtype(self, arg):
expected = Series(pd.Timestamp(arg)).dt.tz_localize("CET")
tm.assert_series_equal(result, expected)

def test_construction_interval(self):
@pytest.mark.parametrize("interval_constructor", [IntervalIndex, IntervalArray])
def test_construction_interval(self, interval_constructor):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no material change here, just cleaning up by using parametrize and removing some unnecessary lines (unassigned repr/str calls)

# construction from interval & array of intervals
index = IntervalIndex.from_breaks(np.arange(3), closed="right")
result = Series(index)
repr(result)
str(result)
tm.assert_index_equal(Index(result.values), index)
intervals = interval_constructor.from_breaks(np.arange(3), closed="right")
result = Series(intervals)
assert result.dtype == "interval[int64]"
tm.assert_index_equal(Index(result.values), Index(intervals))

result = Series(index.values)
tm.assert_index_equal(Index(result.values), index)
@pytest.mark.parametrize(
"data_constructor", [list, np.array], ids=["list", "ndarray[object]"]
)
def test_constructor_infer_interval(self, data_constructor):
# GH 23563: consistent closed results in interval dtype
data = [pd.Interval(0, 1), pd.Interval(0, 2), None]
result = pd.Series(data_constructor(data))
expected = pd.Series(IntervalArray(data))
assert result.dtype == "interval[float64]"
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize(
"data_constructor", [list, np.array], ids=["list", "ndarray[object]"]
)
def test_constructor_interval_mixed_closed(self, data_constructor):
# GH 23563: mixed closed results in object dtype (not interval dtype)
data = [pd.Interval(0, 1, closed="both"), pd.Interval(0, 2, closed="neither")]
result = Series(data_constructor(data))
assert result.dtype == object
assert result.tolist() == data

def test_construction_consistency(self):

Expand All @@ -993,17 +1011,16 @@ def test_construction_consistency(self):
result = Series(s.values, dtype=s.dtype)
tm.assert_series_equal(result, s)

def test_constructor_infer_period(self):
@pytest.mark.parametrize(
"data_constructor", [list, np.array], ids=["list", "ndarray[object]"]
)
def test_constructor_infer_period(self, data_constructor):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no material change here, just cleaning up by using parametrize

data = [pd.Period("2000", "D"), pd.Period("2001", "D"), None]
result = pd.Series(data)
result = pd.Series(data_constructor(data))
expected = pd.Series(period_array(data))
tm.assert_series_equal(result, expected)
assert result.dtype == "Period[D]"

data = np.asarray(data, dtype=object)
tm.assert_series_equal(result, expected)
assert result.dtype == "Period[D]"

def test_constructor_period_incompatible_frequency(self):
data = [pd.Period("2000", "D"), pd.Period("2001", "A")]
result = pd.Series(data)
Expand Down