From 2f06ea2c4ab6a7f73d484e16cd156042596c4d8f Mon Sep 17 00:00:00 2001 From: jschendel Date: Thu, 1 Feb 2018 19:00:16 -0700 Subject: [PATCH] ENH: interval accessor --- pandas/core/dtypes/common.py | 35 ++++++- pandas/core/indexes/accessors.py | 105 +++++++++++++++++++- pandas/core/indexes/interval.py | 4 + pandas/core/series.py | 6 +- pandas/tests/series/test_interval_values.py | 52 ++++++++++ 5 files changed, 198 insertions(+), 4 deletions(-) create mode 100644 pandas/tests/series/test_interval_values.py diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index dca9a5fde0d74..dec29d34b61ac 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -13,7 +13,7 @@ from .generic import (ABCCategorical, ABCPeriodIndex, ABCDatetimeIndex, ABCSeries, ABCSparseArray, ABCSparseSeries, ABCCategoricalIndex, - ABCIndexClass, ABCDateOffset) + ABCIndexClass, ABCDateOffset, ABCIntervalIndex) from .inference import is_string_like, is_list_like from .inference import * # noqa @@ -510,6 +510,39 @@ def is_interval_dtype(arr_or_dtype): return IntervalDtype.is_dtype(arr_or_dtype) +def is_interval_arraylike(arr): + """ + Check whether an array-like is interval array-like or IntervalIndex. + + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean : Whether or not the array-like is a periodical + array-like or PeriodIndex instance. + + Examples + -------- + >>> is_interval_arraylike([1, 2, 3]) + False + >>> is_interval_arraylike(pd.Index([1, 2, 3])) + False + >>> is_interval_arraylike(pd.IntervalIndex.from_breaks([0, 1, 2, 3])) + True + >>> is_interval_arraylike(pd.Series(pd.interval_range(0, 5))) + True + """ + + if isinstance(arr, ABCIntervalIndex): + return True + elif isinstance(arr, (np.ndarray, ABCSeries)): + return arr.dtype == object and lib.infer_dtype(arr) == 'interval' + return getattr(arr, 'inferred_type', None) == 'interval' + + def is_categorical_dtype(arr_or_dtype): """ Check whether an array-like or dtype is of the Categorical dtype. diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index c5b300848876e..e6b2013dcecf9 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -6,7 +6,7 @@ from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.common import ( - is_period_arraylike, + is_period_arraylike, is_interval_arraylike, is_datetime_arraylike, is_integer_dtype, is_datetime64_dtype, is_datetime64tz_dtype, is_timedelta64_dtype, is_categorical_dtype, @@ -15,6 +15,7 @@ from pandas.core.accessor import PandasDelegate from pandas.core.base import NoNewAttributesMixin, PandasObject from pandas.core.indexes.datetimes import DatetimeIndex +from pandas.core.indexes.interval import IntervalIndex from pandas._libs.tslibs.period import IncompatibleFrequency # noqa from pandas.core.indexes.period import PeriodIndex from pandas.core.indexes.timedeltas import TimedeltaIndex @@ -248,3 +249,105 @@ def __new__(cls, data): raise AttributeError("Can only use .dt accessor with datetimelike " "values") + + +class IntervalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): + """ + Accessor object for interval properties of the Series values. + + Parameters + ---------- + data : Series + + Examples + -------- + >>> s.iv.left + >>> s.iv.right + >>> s.iv.mid + >>> s.iv.length + """ + + def __init__(self, data): + from pandas import Series + + if not isinstance(data, ABCSeries): + msg = "cannot convert an object of type {typ} to an IntervalIndex" + raise TypeError(msg.format(typ=type(data))) + + # compat with Categorical[Interval] + orig = data if is_categorical_dtype(data) else None + if orig is not None: + data = Series(orig.values.categories, name=orig.name, copy=False) + + self._validate(data) + self.data = data + self.orig = orig + self.name = getattr(data, 'name', None) + self.index = getattr(data, 'index', None) + self._freeze() + + @staticmethod + def _validate(data): + if not is_interval_arraylike(data): + msg = "Can only use .iv accessor with 'interval' dtype" + raise AttributeError(msg) + + def _get_values(self): + return IntervalIndex(self.data, copy=False, name=self.name) + + def _delegate_property_get(self, name): + from pandas import Series + values = self._get_values() + result = getattr(values, name) + result = np.asarray(result) + + # blow up if we operate on categories + if self.orig is not None: + result = take_1d(result, self.orig.cat.codes) + index = self.orig.index + else: + index = self.index + + # return the result as a Series, which is by definition a copy + result = Series(result, index=index, name=self.name) + + # setting this object will show a SettingWithCopyWarning/Error + result._is_copy = ("modifications to a property of an IntervalIndex " + "object are not supported and are discarded. " + "Change values on the original.") + + return result + + def _delegate_property_set(self, name, value, *args, **kwargs): + raise ValueError("modifications to a property of an IntervalIndex " + "object are not supported. Change values on the " + "original.") + + def _delegate_method(self, name, *args, **kwargs): + from pandas import Series + values = self._get_values() + + method = getattr(values, name) + result = method(*args, **kwargs) + + if not is_list_like(result): + return result + + result = Series(result, index=self.index, name=self.name) + + # setting this object will show a SettingWithCopyWarning/Error + result._is_copy = ("modifications to a method of an IntervalIndex " + "object are not supported and are discarded. " + "Change values on the original.") + + return result + + +IntervalAccessor._add_delegate_accessors( + delegate=IntervalIndex, + accessors=IntervalIndex._interval_ops, + typ='property') +IntervalAccessor._add_delegate_accessors( + delegate=IntervalIndex, + accessors=IntervalIndex._interval_methods, + typ='method') diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 3bf783b5a2faa..99b1b3640dc75 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -208,6 +208,10 @@ class IntervalIndex(IntervalMixin, Index): _comparables = ['name'] _attributes = ['name', 'closed'] + # define my properties & methods for delegation + _interval_ops = ['left', 'right', 'mid', 'length'] + _interval_methods = [] + # we would like our indexing holder to defer to us _defer_to_indexing = True diff --git a/pandas/core/series.py b/pandas/core/series.py index 78b4c3a70a519..cbf480360d5fc 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -45,7 +45,8 @@ from pandas.core import generic, base from pandas.core.internals import SingleBlockManager from pandas.core.arrays.categorical import Categorical, CategoricalAccessor -from pandas.core.indexes.accessors import CombinedDatetimelikeProperties +from pandas.core.indexes.accessors import ( + CombinedDatetimelikeProperties, IntervalAccessor) from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.indexes.period import PeriodIndex @@ -140,7 +141,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame): Copy input data """ _metadata = ['name'] - _accessors = frozenset(['dt', 'cat', 'str']) + _accessors = frozenset(['dt', 'cat', 'str', 'iv']) _deprecations = generic.NDFrame._deprecations | frozenset( ['asobject', 'sortlevel', 'reshape', 'get_value', 'set_value', 'from_csv', 'valid']) @@ -3085,6 +3086,7 @@ def to_period(self, freq=None, copy=True): dt = CachedAccessor("dt", CombinedDatetimelikeProperties) cat = CachedAccessor("cat", CategoricalAccessor) plot = CachedAccessor("plot", gfx.SeriesPlotMethods) + iv = CachedAccessor("iv", IntervalAccessor) # ---------------------------------------------------------------------- # Add plotting methods to Series diff --git a/pandas/tests/series/test_interval_values.py b/pandas/tests/series/test_interval_values.py new file mode 100644 index 0000000000000..293cc65e77cb2 --- /dev/null +++ b/pandas/tests/series/test_interval_values.py @@ -0,0 +1,52 @@ +import pytest +from pandas import ( + Categorical, + IntervalIndex, + interval_range, + Series) +from pandas.core.dtypes.common import is_categorical_dtype +from pandas.core.indexes.accessors import IntervalAccessor +import pandas.util.testing as tm + + +class TestSeriesIntervalAccessor(object): + + @pytest.mark.parametrize('prop', IntervalIndex._interval_ops) + @pytest.mark.parametrize('data', [ + IntervalIndex.from_breaks([0, 1, 3, 6, 10]), + Categorical(interval_range(0, 3).repeat(2))]) + def test_iv_properties(self, prop, data): + s = Series(data) + if is_categorical_dtype(data): + ii = IntervalIndex(data.get_values()) + else: + ii = data + + # check values + result = getattr(s.iv, prop) + expected = Series(getattr(ii, prop)) + tm.assert_series_equal(result, expected) + + # no modifications + msg = ('modifications to a property of an IntervalIndex object are ' + 'not supported. Change values on the original.') + with tm.assert_raises_regex(ValueError, msg): + setattr(s.iv, prop, 1) + + def test_iv_accessor_api(self): + assert Series.iv is IntervalAccessor + + s = Series(interval_range(0, 5)) + assert isinstance(s.iv, IntervalAccessor) + + invalid = Series(list('abcde')) + assert not hasattr(invalid, 'iv') + + with tm.assert_raises_regex(AttributeError, "only use .iv accessor"): + invalid.iv + + def test_no_new_attributes(self): + s = Series(interval_range(0, 5)) + msg = 'You cannot add any new attribute' + with tm.assert_raises_regex(AttributeError, msg): + s.iv.new_attribute = 'foo'