ENH: Add length attribute to Interval and IntervalIndex (#18805)

jschendel · jreback · commit 507157dbedd9 · 2017-12-23T15:36:28.000-05:00
diff --git a/doc/source/api.rst b/doc/source/api.rst
@@ -1623,6 +1623,7 @@ IntervalIndex Components
    IntervalIndex.right
    IntervalIndex.mid
    IntervalIndex.closed
+   IntervalIndex.length
    IntervalIndex.values
    IntervalIndex.is_non_overlapping_monotonic
 
@@ -1995,6 +1996,7 @@ Properties
     Interval.closed_left
     Interval.closed_right
     Interval.left
+    Interval.length
     Interval.mid
     Interval.open_left
     Interval.open_right
diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -141,6 +141,7 @@ Other Enhancements
 - ``IntervalIndex.to_tuples()`` has gained the ``na_tuple`` parameter to control whether NA is returned as a tuple of NA, or NA itself (:issue:`18756`)
 - ``Categorical.rename_categories``, ``CategoricalIndex.rename_categories`` and :attr:`Series.cat.rename_categories`
   can now take a callable as their argument (:issue:`18862`)
+- :class:`Interval` and :class:`IntervalIndex` have gained a ``length`` attribute (:issue:`18789`)
 
 .. _whatsnew_0230.api_breaking:
 
diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx
@@ -54,7 +54,17 @@ cdef class IntervalMixin(object):
             return 0.5 * (self.left + self.right)
         except TypeError:
             # datetime safe version
-            return self.left + 0.5 * (self.right - self.left)
+            return self.left + 0.5 * self.length
+
+    @property
+    def length(self):
+        """Return the length of the Interval"""
+        try:
+            return self.right - self.left
+        except TypeError:
+            # length not defined for some types, e.g. string
+            msg = 'cannot compute length between {left!r} and {right!r}'
+            raise TypeError(msg.format(left=self.left, right=self.right))
 
 
 cdef _interval_like(other):
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
@@ -116,18 +116,26 @@ class IntervalIndex(IntervalMixin, Index):
        The indexing behaviors are provisional and may change in
        a future version of pandas.
 
-    Attributes
+    Parameters
     ----------
-    left, right : array-like (1-dimensional)
-        Left and right bounds for each interval.
+    data : array-like (1-dimensional)
+        Array-like containing Interval objects from which to build the
+        IntervalIndex
     closed : {'left', 'right', 'both', 'neither'}, default 'right'
         Whether the intervals are closed on the left-side, right-side, both or
         neither.
     name : object, optional
         Name to be stored in the index.
     copy : boolean, default False
         Copy the meta-data
+
+    Attributes
+    ----------
+    left
+    right
+    closed
     mid
+    length
     values
     is_non_overlapping_monotonic
 
@@ -599,6 +607,20 @@ def closed(self):
         """
         return self._closed
 
+    @property
+    def length(self):
+        """
+        Return an Index with entries denoting the length of each Interval in
+        the IntervalIndex
+        """
+        try:
+            return self.right - self.left
+        except TypeError:
+            # length not defined for some types, e.g. string
+            msg = ('IntervalIndex contains Intervals without defined length, '
+                   'e.g. Intervals with string endpoints')
+            raise TypeError(msg)
+
     def __len__(self):
         return len(self.left)
 
@@ -683,11 +705,10 @@ def mid(self):
         Return the midpoint of each Interval in the IntervalIndex as an Index
         """
         try:
-            return Index(0.5 * (self.left.values + self.right.values))
+            return 0.5 * (self.left + self.right)
         except TypeError:
             # datetime safe version
-            delta = self.right - self.left
-            return self.left + 0.5 * delta
+            return self.left + 0.5 * self.length
 
     @cache_readonly
     def is_monotonic(self):
diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py
@@ -283,6 +283,36 @@ def test_properties(self, closed):
         tm.assert_numpy_array_equal(np.asarray(index), expected)
         tm.assert_numpy_array_equal(index.values, expected)
 
+    @pytest.mark.parametrize('breaks', [
+        [1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608],
+        [-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf],
+        pd.to_datetime(['20170101', '20170202', '20170303', '20170404']),
+        pd.to_timedelta(['1ns', '2ms', '3s', '4M', '5H', '6D'])])
+    def test_length(self, closed, breaks):
+        # GH 18789
+        index = IntervalIndex.from_breaks(breaks, closed=closed)
+        result = index.length
+        expected = Index(iv.length for iv in index)
+        tm.assert_index_equal(result, expected)
+
+        # with NA
+        index = index.insert(1, np.nan)
+        result = index.length
+        expected = Index(iv.length if notna(iv) else iv for iv in index)
+        tm.assert_index_equal(result, expected)
+
+    @pytest.mark.parametrize('breaks', [
+        list('abcdefgh'),
+        lzip(range(10), range(1, 11)),
+        [['A', 'B'], ['a', 'b'], ['c', 'd'], ['e', 'f']],
+        [Interval(0, 1), Interval(1, 2), Interval(3, 4), Interval(4, 5)]])
+    def test_length_errors(self, closed, breaks):
+        # GH 18789
+        index = IntervalIndex.from_breaks(breaks)
+        msg = 'IntervalIndex contains Intervals without defined length'
+        with tm.assert_raises_regex(TypeError, msg):
+            index.length
+
     def test_with_nans(self, closed):
         index = self.create_index(closed=closed)
         assert not index.hasnans
diff --git a/pandas/tests/scalar/test_interval.py b/pandas/tests/scalar/test_interval.py
@@ -1,6 +1,7 @@
 from __future__ import division
 
-from pandas import Interval, Timestamp
+import numpy as np
+from pandas import Interval, Timestamp, Timedelta
 from pandas.core.common import _any_none
 
 import pytest
@@ -66,6 +67,48 @@ def test_hash(self, interval):
         # should not raise
         hash(interval)
 
+    @pytest.mark.parametrize('left, right, expected', [
+        (0, 5, 5),
+        (-2, 5.5, 7.5),
+        (10, 10, 0),
+        (10, np.inf, np.inf),
+        (-np.inf, -5, np.inf),
+        (-np.inf, np.inf, np.inf),
+        (Timedelta('0 days'), Timedelta('5 days'), Timedelta('5 days')),
+        (Timedelta('10 days'), Timedelta('10 days'), Timedelta('0 days')),
+        (Timedelta('1H10M'), Timedelta('5H5M'), Timedelta('3H55M')),
+        (Timedelta('5S'), Timedelta('1H'), Timedelta('59M55S'))])
+    def test_length(self, left, right, expected):
+        # GH 18789
+        iv = Interval(left, right)
+        result = iv.length
+        assert result == expected
+
+    @pytest.mark.parametrize('left, right, expected', [
+        ('2017-01-01', '2017-01-06', '5 days'),
+        ('2017-01-01', '2017-01-01 12:00:00', '12 hours'),
+        ('2017-01-01 12:00', '2017-01-01 12:00:00', '0 days'),
+        ('2017-01-01 12:01', '2017-01-05 17:31:00', '4 days 5 hours 30 min')])
+    @pytest.mark.parametrize('tz', (None, 'UTC', 'CET', 'US/Eastern'))
+    def test_length_timestamp(self, tz, left, right, expected):
+        # GH 18789
+        iv = Interval(Timestamp(left, tz=tz), Timestamp(right, tz=tz))
+        result = iv.length
+        expected = Timedelta(expected)
+        assert result == expected
+
+    @pytest.mark.parametrize('left, right', [
+        ('a', 'z'),
+        (('a', 'b'), ('c', 'd')),
+        (list('AB'), list('ab')),
+        (Interval(0, 1), Interval(1, 2))])
+    def test_length_errors(self, left, right):
+        # GH 18789
+        iv = Interval(left, right)
+        msg = 'cannot compute length between .* and .*'
+        with tm.assert_raises_regex(TypeError, msg):
+            iv.length
+
     def test_math_add(self, interval):
         expected = Interval(1, 2)
         actual = interval + 1