diff --git a/doc/source/api.rst b/doc/source/api.rst index 3edaadba64762..64f972e52d190 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1623,6 +1623,7 @@ IntervalIndex Components IntervalIndex.right IntervalIndex.mid IntervalIndex.closed + IntervalIndex.length IntervalIndex.values IntervalIndex.is_non_overlapping_monotonic @@ -1995,6 +1996,7 @@ Properties Interval.closed_left Interval.closed_right Interval.left + Interval.length Interval.mid Interval.open_left Interval.open_right diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 40e1e2011479c..43587ee590fd4 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -141,6 +141,7 @@ Other Enhancements - ``IntervalIndex.to_tuples()`` has gained the ``na_tuple`` parameter to control whether NA is returned as a tuple of NA, or NA itself (:issue:`18756`) - ``Categorical.rename_categories``, ``CategoricalIndex.rename_categories`` and :attr:`Series.cat.rename_categories` can now take a callable as their argument (:issue:`18862`) +- :class:`Interval` and :class:`IntervalIndex` have gained a ``length`` attribute (:issue:`18789`) .. _whatsnew_0230.api_breaking: diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 480ea5cb4fa80..f1da60057186c 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -54,7 +54,17 @@ cdef class IntervalMixin(object): return 0.5 * (self.left + self.right) except TypeError: # datetime safe version - return self.left + 0.5 * (self.right - self.left) + return self.left + 0.5 * self.length + + @property + def length(self): + """Return the length of the Interval""" + try: + return self.right - self.left + except TypeError: + # length not defined for some types, e.g. string + msg = 'cannot compute length between {left!r} and {right!r}' + raise TypeError(msg.format(left=self.left, right=self.right)) cdef _interval_like(other): diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index cb786574909db..b7d3305730521 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -116,10 +116,11 @@ class IntervalIndex(IntervalMixin, Index): The indexing behaviors are provisional and may change in a future version of pandas. - Attributes + Parameters ---------- - left, right : array-like (1-dimensional) - Left and right bounds for each interval. + data : array-like (1-dimensional) + Array-like containing Interval objects from which to build the + IntervalIndex closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the intervals are closed on the left-side, right-side, both or neither. @@ -127,7 +128,14 @@ class IntervalIndex(IntervalMixin, Index): Name to be stored in the index. copy : boolean, default False Copy the meta-data + + Attributes + ---------- + left + right + closed mid + length values is_non_overlapping_monotonic @@ -599,6 +607,20 @@ def closed(self): """ return self._closed + @property + def length(self): + """ + Return an Index with entries denoting the length of each Interval in + the IntervalIndex + """ + try: + return self.right - self.left + except TypeError: + # length not defined for some types, e.g. string + msg = ('IntervalIndex contains Intervals without defined length, ' + 'e.g. Intervals with string endpoints') + raise TypeError(msg) + def __len__(self): return len(self.left) @@ -683,11 +705,10 @@ def mid(self): Return the midpoint of each Interval in the IntervalIndex as an Index """ try: - return Index(0.5 * (self.left.values + self.right.values)) + return 0.5 * (self.left + self.right) except TypeError: # datetime safe version - delta = self.right - self.left - return self.left + 0.5 * delta + return self.left + 0.5 * self.length @cache_readonly def is_monotonic(self): diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index c809127a66ab8..74446af8b77f6 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -283,6 +283,36 @@ def test_properties(self, closed): tm.assert_numpy_array_equal(np.asarray(index), expected) tm.assert_numpy_array_equal(index.values, expected) + @pytest.mark.parametrize('breaks', [ + [1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608], + [-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf], + pd.to_datetime(['20170101', '20170202', '20170303', '20170404']), + pd.to_timedelta(['1ns', '2ms', '3s', '4M', '5H', '6D'])]) + def test_length(self, closed, breaks): + # GH 18789 + index = IntervalIndex.from_breaks(breaks, closed=closed) + result = index.length + expected = Index(iv.length for iv in index) + tm.assert_index_equal(result, expected) + + # with NA + index = index.insert(1, np.nan) + result = index.length + expected = Index(iv.length if notna(iv) else iv for iv in index) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('breaks', [ + list('abcdefgh'), + lzip(range(10), range(1, 11)), + [['A', 'B'], ['a', 'b'], ['c', 'd'], ['e', 'f']], + [Interval(0, 1), Interval(1, 2), Interval(3, 4), Interval(4, 5)]]) + def test_length_errors(self, closed, breaks): + # GH 18789 + index = IntervalIndex.from_breaks(breaks) + msg = 'IntervalIndex contains Intervals without defined length' + with tm.assert_raises_regex(TypeError, msg): + index.length + def test_with_nans(self, closed): index = self.create_index(closed=closed) assert not index.hasnans diff --git a/pandas/tests/scalar/test_interval.py b/pandas/tests/scalar/test_interval.py index 533a79656f174..3db474e32c4dd 100644 --- a/pandas/tests/scalar/test_interval.py +++ b/pandas/tests/scalar/test_interval.py @@ -1,6 +1,7 @@ from __future__ import division -from pandas import Interval, Timestamp +import numpy as np +from pandas import Interval, Timestamp, Timedelta from pandas.core.common import _any_none import pytest @@ -66,6 +67,48 @@ def test_hash(self, interval): # should not raise hash(interval) + @pytest.mark.parametrize('left, right, expected', [ + (0, 5, 5), + (-2, 5.5, 7.5), + (10, 10, 0), + (10, np.inf, np.inf), + (-np.inf, -5, np.inf), + (-np.inf, np.inf, np.inf), + (Timedelta('0 days'), Timedelta('5 days'), Timedelta('5 days')), + (Timedelta('10 days'), Timedelta('10 days'), Timedelta('0 days')), + (Timedelta('1H10M'), Timedelta('5H5M'), Timedelta('3H55M')), + (Timedelta('5S'), Timedelta('1H'), Timedelta('59M55S'))]) + def test_length(self, left, right, expected): + # GH 18789 + iv = Interval(left, right) + result = iv.length + assert result == expected + + @pytest.mark.parametrize('left, right, expected', [ + ('2017-01-01', '2017-01-06', '5 days'), + ('2017-01-01', '2017-01-01 12:00:00', '12 hours'), + ('2017-01-01 12:00', '2017-01-01 12:00:00', '0 days'), + ('2017-01-01 12:01', '2017-01-05 17:31:00', '4 days 5 hours 30 min')]) + @pytest.mark.parametrize('tz', (None, 'UTC', 'CET', 'US/Eastern')) + def test_length_timestamp(self, tz, left, right, expected): + # GH 18789 + iv = Interval(Timestamp(left, tz=tz), Timestamp(right, tz=tz)) + result = iv.length + expected = Timedelta(expected) + assert result == expected + + @pytest.mark.parametrize('left, right', [ + ('a', 'z'), + (('a', 'b'), ('c', 'd')), + (list('AB'), list('ab')), + (Interval(0, 1), Interval(1, 2))]) + def test_length_errors(self, left, right): + # GH 18789 + iv = Interval(left, right) + msg = 'cannot compute length between .* and .*' + with tm.assert_raises_regex(TypeError, msg): + iv.length + def test_math_add(self, interval): expected = Interval(1, 2) actual = interval + 1