Skip to content

[WIP] ENH: interval accessor #19502

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 34 additions & 1 deletion pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from .generic import (ABCCategorical, ABCPeriodIndex,
ABCDatetimeIndex, ABCSeries,
ABCSparseArray, ABCSparseSeries, ABCCategoricalIndex,
ABCIndexClass, ABCDateOffset)
ABCIndexClass, ABCDateOffset, ABCIntervalIndex)
from .inference import is_string_like, is_list_like
from .inference import * # noqa

Expand Down Expand Up @@ -510,6 +510,39 @@ def is_interval_dtype(arr_or_dtype):
return IntervalDtype.is_dtype(arr_or_dtype)


def is_interval_arraylike(arr):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may not be needed in the long-term; once #19453 is closed is_interval_dtype should be sufficient anywhere I've used this, so haven't added related tests. Can add tests if this PR looks like it will get closed first. Or is there a reason we'd want to keep this after #19453, in which case tests should be added regardless?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure as part of adding first class dtypes we would want to remove this, but no problem add it here now. you can add a couple of basic test for this though.

"""
Check whether an array-like is interval array-like or IntervalIndex.

Parameters
----------
arr : array-like
The array-like to check.

Returns
-------
boolean : Whether or not the array-like is a periodical
array-like or PeriodIndex instance.

Examples
--------
>>> is_interval_arraylike([1, 2, 3])
False
>>> is_interval_arraylike(pd.Index([1, 2, 3]))
False
>>> is_interval_arraylike(pd.IntervalIndex.from_breaks([0, 1, 2, 3]))
True
>>> is_interval_arraylike(pd.Series(pd.interval_range(0, 5)))
True
"""

if isinstance(arr, ABCIntervalIndex):
return True
elif isinstance(arr, (np.ndarray, ABCSeries)):
return arr.dtype == object and lib.infer_dtype(arr) == 'interval'
return getattr(arr, 'inferred_type', None) == 'interval'


def is_categorical_dtype(arr_or_dtype):
"""
Check whether an array-like or dtype is of the Categorical dtype.
Expand Down
105 changes: 104 additions & 1 deletion pandas/core/indexes/accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from pandas.core.dtypes.generic import ABCSeries
from pandas.core.dtypes.common import (
is_period_arraylike,
is_period_arraylike, is_interval_arraylike,
is_datetime_arraylike, is_integer_dtype,
is_datetime64_dtype, is_datetime64tz_dtype,
is_timedelta64_dtype, is_categorical_dtype,
Expand All @@ -15,6 +15,7 @@
from pandas.core.accessor import PandasDelegate
from pandas.core.base import NoNewAttributesMixin, PandasObject
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.core.indexes.interval import IntervalIndex
from pandas._libs.tslibs.period import IncompatibleFrequency # noqa
from pandas.core.indexes.period import PeriodIndex
from pandas.core.indexes.timedeltas import TimedeltaIndex
Expand Down Expand Up @@ -248,3 +249,105 @@ def __new__(cls, data):

raise AttributeError("Can only use .dt accessor with datetimelike "
"values")


class IntervalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin):
"""
Accessor object for interval properties of the Series values.

Parameters
----------
data : Series

Examples
--------
>>> s.iv.left
>>> s.iv.right
>>> s.iv.mid
>>> s.iv.length
"""

def __init__(self, data):
from pandas import Series

if not isinstance(data, ABCSeries):
msg = "cannot convert an object of type {typ} to an IntervalIndex"
raise TypeError(msg.format(typ=type(data)))

# compat with Categorical[Interval]
orig = data if is_categorical_dtype(data) else None
if orig is not None:
data = Series(orig.values.categories, name=orig.name, copy=False)

self._validate(data)
self.data = data
self.orig = orig
self.name = getattr(data, 'name', None)
self.index = getattr(data, 'index', None)
self._freeze()

@staticmethod
def _validate(data):
if not is_interval_arraylike(data):
msg = "Can only use .iv accessor with 'interval' dtype"
raise AttributeError(msg)

def _get_values(self):
return IntervalIndex(self.data, copy=False, name=self.name)

def _delegate_property_get(self, name):
from pandas import Series
values = self._get_values()
result = getattr(values, name)
result = np.asarray(result)

# blow up if we operate on categories
if self.orig is not None:
result = take_1d(result, self.orig.cat.codes)
index = self.orig.index
else:
index = self.index

# return the result as a Series, which is by definition a copy
result = Series(result, index=index, name=self.name)

# setting this object will show a SettingWithCopyWarning/Error
result._is_copy = ("modifications to a property of an IntervalIndex "
"object are not supported and are discarded. "
"Change values on the original.")

return result

def _delegate_property_set(self, name, value, *args, **kwargs):
raise ValueError("modifications to a property of an IntervalIndex "
"object are not supported. Change values on the "
"original.")

def _delegate_method(self, name, *args, **kwargs):
from pandas import Series
values = self._get_values()

method = getattr(values, name)
result = method(*args, **kwargs)

if not is_list_like(result):
return result

result = Series(result, index=self.index, name=self.name)

# setting this object will show a SettingWithCopyWarning/Error
result._is_copy = ("modifications to a method of an IntervalIndex "
"object are not supported and are discarded. "
"Change values on the original.")

return result


IntervalAccessor._add_delegate_accessors(
delegate=IntervalIndex,
accessors=IntervalIndex._interval_ops,
typ='property')
IntervalAccessor._add_delegate_accessors(
delegate=IntervalIndex,
accessors=IntervalIndex._interval_methods,
typ='method')
4 changes: 4 additions & 0 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,10 @@ class IntervalIndex(IntervalMixin, Index):
_comparables = ['name']
_attributes = ['name', 'closed']

# define my properties & methods for delegation
_interval_ops = ['left', 'right', 'mid', 'length']
_interval_methods = []

# we would like our indexing holder to defer to us
_defer_to_indexing = True

Expand Down
6 changes: 4 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@
from pandas.core import generic, base
from pandas.core.internals import SingleBlockManager
from pandas.core.arrays.categorical import Categorical, CategoricalAccessor
from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
from pandas.core.indexes.accessors import (
CombinedDatetimelikeProperties, IntervalAccessor)
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.core.indexes.timedeltas import TimedeltaIndex
from pandas.core.indexes.period import PeriodIndex
Expand Down Expand Up @@ -140,7 +141,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame):
Copy input data
"""
_metadata = ['name']
_accessors = frozenset(['dt', 'cat', 'str'])
_accessors = frozenset(['dt', 'cat', 'str', 'iv'])
_deprecations = generic.NDFrame._deprecations | frozenset(
['asobject', 'sortlevel', 'reshape', 'get_value', 'set_value',
'from_csv', 'valid'])
Expand Down Expand Up @@ -3085,6 +3086,7 @@ def to_period(self, freq=None, copy=True):
dt = CachedAccessor("dt", CombinedDatetimelikeProperties)
cat = CachedAccessor("cat", CategoricalAccessor)
plot = CachedAccessor("plot", gfx.SeriesPlotMethods)
iv = CachedAccessor("iv", IntervalAccessor)

# ----------------------------------------------------------------------
# Add plotting methods to Series
Expand Down
52 changes: 52 additions & 0 deletions pandas/tests/series/test_interval_values.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import pytest
from pandas import (
Categorical,
IntervalIndex,
interval_range,
Series)
from pandas.core.dtypes.common import is_categorical_dtype
from pandas.core.indexes.accessors import IntervalAccessor
import pandas.util.testing as tm


class TestSeriesIntervalAccessor(object):

@pytest.mark.parametrize('prop', IntervalIndex._interval_ops)
@pytest.mark.parametrize('data', [
IntervalIndex.from_breaks([0, 1, 3, 6, 10]),
Categorical(interval_range(0, 3).repeat(2))])
def test_iv_properties(self, prop, data):
s = Series(data)
if is_categorical_dtype(data):
ii = IntervalIndex(data.get_values())
else:
ii = data

# check values
result = getattr(s.iv, prop)
expected = Series(getattr(ii, prop))
tm.assert_series_equal(result, expected)

# no modifications
msg = ('modifications to a property of an IntervalIndex object are '
'not supported. Change values on the original.')
with tm.assert_raises_regex(ValueError, msg):
setattr(s.iv, prop, 1)

def test_iv_accessor_api(self):
assert Series.iv is IntervalAccessor

s = Series(interval_range(0, 5))
assert isinstance(s.iv, IntervalAccessor)

invalid = Series(list('abcde'))
assert not hasattr(invalid, 'iv')

with tm.assert_raises_regex(AttributeError, "only use .iv accessor"):
invalid.iv

def test_no_new_attributes(self):
s = Series(interval_range(0, 5))
msg = 'You cannot add any new attribute'
with tm.assert_raises_regex(AttributeError, msg):
s.iv.new_attribute = 'foo'