Skip to content

ENH: Add length attribute to Interval and IntervalIndex #18805

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Dec 23, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1623,6 +1623,7 @@ IntervalIndex Components
IntervalIndex.right
IntervalIndex.mid
IntervalIndex.closed
IntervalIndex.length
IntervalIndex.values
IntervalIndex.is_non_overlapping_monotonic

Expand Down Expand Up @@ -1995,6 +1996,7 @@ Properties
Interval.closed_left
Interval.closed_right
Interval.left
Interval.length
Interval.mid
Interval.open_left
Interval.open_right
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ Other Enhancements
- ``IntervalIndex.to_tuples()`` has gained the ``na_tuple`` parameter to control whether NA is returned as a tuple of NA, or NA itself (:issue:`18756`)
- ``Categorical.rename_categories``, ``CategoricalIndex.rename_categories`` and :attr:`Series.cat.rename_categories`
can now take a callable as their argument (:issue:`18862`)
- :class:`Interval` and :class:`IntervalIndex` have gained a ``length`` attribute (:issue:`18789`)

.. _whatsnew_0230.api_breaking:

Expand Down
12 changes: 11 additions & 1 deletion pandas/_libs/interval.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,17 @@ cdef class IntervalMixin(object):
return 0.5 * (self.left + self.right)
except TypeError:
# datetime safe version
return self.left + 0.5 * (self.right - self.left)
return self.left + 0.5 * self.length

@property
def length(self):
"""Return the length of the Interval"""
try:
return self.right - self.left
except TypeError:
# length not defined for some types, e.g. string
msg = 'cannot compute length between {left!r} and {right!r}'
raise TypeError(msg.format(left=self.left, right=self.right))


cdef _interval_like(other):
Expand Down
33 changes: 27 additions & 6 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,18 +116,26 @@ class IntervalIndex(IntervalMixin, Index):
The indexing behaviors are provisional and may change in
a future version of pandas.

Attributes
Parameters
----------
left, right : array-like (1-dimensional)
Left and right bounds for each interval.
data : array-like (1-dimensional)
Array-like containing Interval objects from which to build the
IntervalIndex
closed : {'left', 'right', 'both', 'neither'}, default 'right'
Whether the intervals are closed on the left-side, right-side, both or
neither.
name : object, optional
Name to be stored in the index.
copy : boolean, default False
Copy the meta-data

Attributes
----------
left
right
closed
mid
length
values
is_non_overlapping_monotonic

Expand Down Expand Up @@ -599,6 +607,20 @@ def closed(self):
"""
return self._closed

@property
def length(self):
"""
Return an Index with entries denoting the length of each Interval in
the IntervalIndex
"""
try:
return self.right - self.left
except TypeError:
# length not defined for some types, e.g. string
msg = ('IntervalIndex contains Intervals without defined length, '
'e.g. Intervals with string endpoints')
raise TypeError(msg)

def __len__(self):
return len(self.left)

Expand Down Expand Up @@ -683,11 +705,10 @@ def mid(self):
Return the midpoint of each Interval in the IntervalIndex as an Index
"""
try:
return Index(0.5 * (self.left.values + self.right.values))
return 0.5 * (self.left + self.right)
except TypeError:
# datetime safe version
delta = self.right - self.left
return self.left + 0.5 * delta
return self.left + 0.5 * self.length

@cache_readonly
def is_monotonic(self):
Expand Down
30 changes: 30 additions & 0 deletions pandas/tests/indexes/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,36 @@ def test_properties(self, closed):
tm.assert_numpy_array_equal(np.asarray(index), expected)
tm.assert_numpy_array_equal(index.values, expected)

@pytest.mark.parametrize('breaks', [
[1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608],
[-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf],
pd.to_datetime(['20170101', '20170202', '20170303', '20170404']),
pd.to_timedelta(['1ns', '2ms', '3s', '4M', '5H', '6D'])])
def test_length(self, closed, breaks):
# GH 18789
index = IntervalIndex.from_breaks(breaks, closed=closed)
result = index.length
expected = Index(iv.length for iv in index)
tm.assert_index_equal(result, expected)

# with NA
index = index.insert(1, np.nan)
result = index.length
expected = Index(iv.length if notna(iv) else iv for iv in index)
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize('breaks', [
list('abcdefgh'),
lzip(range(10), range(1, 11)),
[['A', 'B'], ['a', 'b'], ['c', 'd'], ['e', 'f']],
[Interval(0, 1), Interval(1, 2), Interval(3, 4), Interval(4, 5)]])
def test_length_errors(self, closed, breaks):
# GH 18789
index = IntervalIndex.from_breaks(breaks)
msg = 'IntervalIndex contains Intervals without defined length'
with tm.assert_raises_regex(TypeError, msg):
index.length

def test_with_nans(self, closed):
index = self.create_index(closed=closed)
assert not index.hasnans
Expand Down
45 changes: 44 additions & 1 deletion pandas/tests/scalar/test_interval.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import division

from pandas import Interval, Timestamp
import numpy as np
from pandas import Interval, Timestamp, Timedelta
from pandas.core.common import _any_none

import pytest
Expand Down Expand Up @@ -66,6 +67,48 @@ def test_hash(self, interval):
# should not raise
hash(interval)

@pytest.mark.parametrize('left, right, expected', [
(0, 5, 5),
(-2, 5.5, 7.5),
(10, 10, 0),
(10, np.inf, np.inf),
(-np.inf, -5, np.inf),
(-np.inf, np.inf, np.inf),
(Timedelta('0 days'), Timedelta('5 days'), Timedelta('5 days')),
(Timedelta('10 days'), Timedelta('10 days'), Timedelta('0 days')),
(Timedelta('1H10M'), Timedelta('5H5M'), Timedelta('3H55M')),
(Timedelta('5S'), Timedelta('1H'), Timedelta('59M55S'))])
def test_length(self, left, right, expected):
# GH 18789
iv = Interval(left, right)
result = iv.length
assert result == expected

@pytest.mark.parametrize('left, right, expected', [
('2017-01-01', '2017-01-06', '5 days'),
('2017-01-01', '2017-01-01 12:00:00', '12 hours'),
('2017-01-01 12:00', '2017-01-01 12:00:00', '0 days'),
('2017-01-01 12:01', '2017-01-05 17:31:00', '4 days 5 hours 30 min')])
@pytest.mark.parametrize('tz', (None, 'UTC', 'CET', 'US/Eastern'))
def test_length_timestamp(self, tz, left, right, expected):
# GH 18789
iv = Interval(Timestamp(left, tz=tz), Timestamp(right, tz=tz))
result = iv.length
expected = Timedelta(expected)
assert result == expected

@pytest.mark.parametrize('left, right', [
('a', 'z'),
(('a', 'b'), ('c', 'd')),
(list('AB'), list('ab')),
(Interval(0, 1), Interval(1, 2))])
def test_length_errors(self, left, right):
# GH 18789
iv = Interval(left, right)
msg = 'cannot compute length between .* and .*'
with tm.assert_raises_regex(TypeError, msg):
iv.length

def test_math_add(self, interval):
expected = Interval(1, 2)
actual = interval + 1
Expand Down