From 71a5bc270425371766833af9e2f228c22fad7400 Mon Sep 17 00:00:00 2001 From: jschendel Date: Sat, 30 Dec 2017 15:28:41 -0700 Subject: [PATCH] TST: Split tests/indexes/interval/test_interval.py into separate files --- .../tests/indexes/interval/test_interval.py | 369 +----------------- .../indexes/interval/test_interval_range.py | 301 ++++++++++++++ .../indexes/interval/test_interval_tree.py | 93 +++++ 3 files changed, 395 insertions(+), 368 deletions(-) create mode 100644 pandas/tests/indexes/interval/test_interval_range.py create mode 100644 pandas/tests/indexes/interval/test_interval_tree.py diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 3ca4c31b7f059..73520e984ae12 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -2,14 +2,11 @@ import pytest import numpy as np -from datetime import timedelta from pandas import ( Interval, IntervalIndex, Index, isna, notna, interval_range, Timestamp, - Timedelta, compat, date_range, timedelta_range, DateOffset) + Timedelta, date_range, timedelta_range) from pandas.compat import lzip from pandas.core.common import _asarray_tuplesafe -from pandas.tseries.offsets import Day -from pandas._libs.interval import IntervalTree from pandas.tests.indexes.common import Base import pandas.util.testing as tm import pandas as pd @@ -1158,367 +1155,3 @@ def test_to_tuples_na(self, tuples, na_tuple): assert all(isna(x) for x in result_na) else: assert isna(result_na) - - -class TestIntervalRange(object): - - def test_construction_from_numeric(self, closed, name): - # combinations of start/end/periods without freq - expected = IntervalIndex.from_breaks( - np.arange(0, 6), name=name, closed=closed) - - result = interval_range(start=0, end=5, name=name, closed=closed) - tm.assert_index_equal(result, expected) - - result = interval_range(start=0, periods=5, name=name, closed=closed) - tm.assert_index_equal(result, expected) - - result = interval_range(end=5, periods=5, name=name, closed=closed) - tm.assert_index_equal(result, expected) - - # combinations of start/end/periods with freq - expected = IntervalIndex.from_tuples([(0, 2), (2, 4), (4, 6)], - name=name, closed=closed) - - result = interval_range(start=0, end=6, freq=2, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - result = interval_range(start=0, periods=3, freq=2, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - result = interval_range(end=6, periods=3, freq=2, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - # output truncates early if freq causes end to be skipped. - expected = IntervalIndex.from_tuples([(0.0, 1.5), (1.5, 3.0)], - name=name, closed=closed) - result = interval_range(start=0, end=4, freq=1.5, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - @pytest.mark.parametrize('tz', [None, 'US/Eastern']) - def test_construction_from_timestamp(self, closed, name, tz): - # combinations of start/end/periods without freq - start = Timestamp('2017-01-01', tz=tz) - end = Timestamp('2017-01-06', tz=tz) - breaks = date_range(start=start, end=end) - expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) - - result = interval_range(start=start, end=end, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - result = interval_range(start=start, periods=5, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - result = interval_range(end=end, periods=5, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - # combinations of start/end/periods with fixed freq - freq = '2D' - start = Timestamp('2017-01-01', tz=tz) - end = Timestamp('2017-01-07', tz=tz) - breaks = date_range(start=start, end=end, freq=freq) - expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) - - result = interval_range(start=start, end=end, freq=freq, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - result = interval_range(start=start, periods=3, freq=freq, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - result = interval_range(end=end, periods=3, freq=freq, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - # output truncates early if freq causes end to be skipped. - end = Timestamp('2017-01-08', tz=tz) - result = interval_range(start=start, end=end, freq=freq, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - # combinations of start/end/periods with non-fixed freq - freq = 'M' - start = Timestamp('2017-01-01', tz=tz) - end = Timestamp('2017-12-31', tz=tz) - breaks = date_range(start=start, end=end, freq=freq) - expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) - - result = interval_range(start=start, end=end, freq=freq, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - result = interval_range(start=start, periods=11, freq=freq, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - result = interval_range(end=end, periods=11, freq=freq, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - # output truncates early if freq causes end to be skipped. - end = Timestamp('2018-01-15', tz=tz) - result = interval_range(start=start, end=end, freq=freq, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - def test_construction_from_timedelta(self, closed, name): - # combinations of start/end/periods without freq - start, end = Timedelta('1 day'), Timedelta('6 days') - breaks = timedelta_range(start=start, end=end) - expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) - - result = interval_range(start=start, end=end, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - result = interval_range(start=start, periods=5, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - result = interval_range(end=end, periods=5, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - # combinations of start/end/periods with fixed freq - freq = '2D' - start, end = Timedelta('1 day'), Timedelta('7 days') - breaks = timedelta_range(start=start, end=end, freq=freq) - expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) - - result = interval_range(start=start, end=end, freq=freq, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - result = interval_range(start=start, periods=3, freq=freq, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - result = interval_range(end=end, periods=3, freq=freq, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - # output truncates early if freq causes end to be skipped. - end = Timedelta('7 days 1 hour') - result = interval_range(start=start, end=end, freq=freq, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - def test_constructor_coverage(self): - # float value for periods - expected = pd.interval_range(start=0, periods=10) - result = pd.interval_range(start=0, periods=10.5) - tm.assert_index_equal(result, expected) - - # equivalent timestamp-like start/end - start, end = Timestamp('2017-01-01'), Timestamp('2017-01-15') - expected = pd.interval_range(start=start, end=end) - - result = pd.interval_range(start=start.to_pydatetime(), - end=end.to_pydatetime()) - tm.assert_index_equal(result, expected) - - result = pd.interval_range(start=start.asm8, end=end.asm8) - tm.assert_index_equal(result, expected) - - # equivalent freq with timestamp - equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1), - DateOffset(days=1)] - for freq in equiv_freq: - result = pd.interval_range(start=start, end=end, freq=freq) - tm.assert_index_equal(result, expected) - - # equivalent timedelta-like start/end - start, end = Timedelta(days=1), Timedelta(days=10) - expected = pd.interval_range(start=start, end=end) - - result = pd.interval_range(start=start.to_pytimedelta(), - end=end.to_pytimedelta()) - tm.assert_index_equal(result, expected) - - result = pd.interval_range(start=start.asm8, end=end.asm8) - tm.assert_index_equal(result, expected) - - # equivalent freq with timedelta - equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1)] - for freq in equiv_freq: - result = pd.interval_range(start=start, end=end, freq=freq) - tm.assert_index_equal(result, expected) - - def test_errors(self): - # not enough params - msg = ('Of the three parameters: start, end, and periods, ' - 'exactly two must be specified') - - with tm.assert_raises_regex(ValueError, msg): - interval_range(start=0) - - with tm.assert_raises_regex(ValueError, msg): - interval_range(end=5) - - with tm.assert_raises_regex(ValueError, msg): - interval_range(periods=2) - - with tm.assert_raises_regex(ValueError, msg): - interval_range() - - # too many params - with tm.assert_raises_regex(ValueError, msg): - interval_range(start=0, end=5, periods=6) - - # mixed units - msg = 'start, end, freq need to be type compatible' - with tm.assert_raises_regex(TypeError, msg): - interval_range(start=0, end=Timestamp('20130101'), freq=2) - - with tm.assert_raises_regex(TypeError, msg): - interval_range(start=0, end=Timedelta('1 day'), freq=2) - - with tm.assert_raises_regex(TypeError, msg): - interval_range(start=0, end=10, freq='D') - - with tm.assert_raises_regex(TypeError, msg): - interval_range(start=Timestamp('20130101'), end=10, freq='D') - - with tm.assert_raises_regex(TypeError, msg): - interval_range(start=Timestamp('20130101'), - end=Timedelta('1 day'), freq='D') - - with tm.assert_raises_regex(TypeError, msg): - interval_range(start=Timestamp('20130101'), - end=Timestamp('20130110'), freq=2) - - with tm.assert_raises_regex(TypeError, msg): - interval_range(start=Timedelta('1 day'), end=10, freq='D') - - with tm.assert_raises_regex(TypeError, msg): - interval_range(start=Timedelta('1 day'), - end=Timestamp('20130110'), freq='D') - - with tm.assert_raises_regex(TypeError, msg): - interval_range(start=Timedelta('1 day'), - end=Timedelta('10 days'), freq=2) - - # invalid periods - msg = 'periods must be a number, got foo' - with tm.assert_raises_regex(TypeError, msg): - interval_range(start=0, periods='foo') - - # invalid start - msg = 'start must be numeric or datetime-like, got foo' - with tm.assert_raises_regex(ValueError, msg): - interval_range(start='foo', periods=10) - - # invalid end - msg = r'end must be numeric or datetime-like, got \(0, 1\]' - with tm.assert_raises_regex(ValueError, msg): - interval_range(end=Interval(0, 1), periods=10) - - # invalid freq for datetime-like - msg = 'freq must be numeric or convertible to DateOffset, got foo' - with tm.assert_raises_regex(ValueError, msg): - interval_range(start=0, end=10, freq='foo') - - with tm.assert_raises_regex(ValueError, msg): - interval_range(start=Timestamp('20130101'), periods=10, freq='foo') - - with tm.assert_raises_regex(ValueError, msg): - interval_range(end=Timedelta('1 day'), periods=10, freq='foo') - - # mixed tz - start = Timestamp('2017-01-01', tz='US/Eastern') - end = Timestamp('2017-01-07', tz='US/Pacific') - msg = 'Start and end cannot both be tz-aware with different timezones' - with tm.assert_raises_regex(TypeError, msg): - interval_range(start=start, end=end) - - -class TestIntervalTree(object): - def setup_method(self, method): - gentree = lambda dtype: IntervalTree(np.arange(5, dtype=dtype), - np.arange(5, dtype=dtype) + 2) - self.tree = gentree('int64') - self.trees = {dtype: gentree(dtype) - for dtype in ['int32', 'int64', 'float32', 'float64']} - - def test_get_loc(self): - for dtype, tree in self.trees.items(): - tm.assert_numpy_array_equal(tree.get_loc(1), - np.array([0], dtype='int64')) - tm.assert_numpy_array_equal(np.sort(tree.get_loc(2)), - np.array([0, 1], dtype='int64')) - with pytest.raises(KeyError): - tree.get_loc(-1) - - def test_get_indexer(self): - for dtype, tree in self.trees.items(): - tm.assert_numpy_array_equal( - tree.get_indexer(np.array([1.0, 5.5, 6.5])), - np.array([0, 4, -1], dtype='int64')) - with pytest.raises(KeyError): - tree.get_indexer(np.array([3.0])) - - def test_get_indexer_non_unique(self): - indexer, missing = self.tree.get_indexer_non_unique( - np.array([1.0, 2.0, 6.5])) - tm.assert_numpy_array_equal(indexer[:1], - np.array([0], dtype='int64')) - tm.assert_numpy_array_equal(np.sort(indexer[1:3]), - np.array([0, 1], dtype='int64')) - tm.assert_numpy_array_equal(np.sort(indexer[3:]), - np.array([-1], dtype='int64')) - tm.assert_numpy_array_equal(missing, np.array([2], dtype='int64')) - - def test_duplicates(self): - tree = IntervalTree([0, 0, 0], [1, 1, 1]) - tm.assert_numpy_array_equal(np.sort(tree.get_loc(0.5)), - np.array([0, 1, 2], dtype='int64')) - - with pytest.raises(KeyError): - tree.get_indexer(np.array([0.5])) - - indexer, missing = tree.get_indexer_non_unique(np.array([0.5])) - tm.assert_numpy_array_equal(np.sort(indexer), - np.array([0, 1, 2], dtype='int64')) - tm.assert_numpy_array_equal(missing, np.array([], dtype='int64')) - - def test_get_loc_closed(self): - for closed in ['left', 'right', 'both', 'neither']: - tree = IntervalTree([0], [1], closed=closed) - for p, errors in [(0, tree.open_left), - (1, tree.open_right)]: - if errors: - with pytest.raises(KeyError): - tree.get_loc(p) - else: - tm.assert_numpy_array_equal(tree.get_loc(p), - np.array([0], dtype='int64')) - - @pytest.mark.skipif(compat.is_platform_32bit(), - reason="int type mismatch on 32bit") - def test_get_indexer_closed(self): - x = np.arange(1000, dtype='float64') - found = x.astype('intp') - not_found = (-1 * np.ones(1000)).astype('intp') - - for leaf_size in [1, 10, 100, 10000]: - for closed in ['left', 'right', 'both', 'neither']: - tree = IntervalTree(x, x + 0.5, closed=closed, - leaf_size=leaf_size) - tm.assert_numpy_array_equal(found, - tree.get_indexer(x + 0.25)) - - expected = found if tree.closed_left else not_found - tm.assert_numpy_array_equal(expected, - tree.get_indexer(x + 0.0)) - - expected = found if tree.closed_right else not_found - tm.assert_numpy_array_equal(expected, - tree.get_indexer(x + 0.5)) diff --git a/pandas/tests/indexes/interval/test_interval_range.py b/pandas/tests/indexes/interval/test_interval_range.py new file mode 100644 index 0000000000000..203e8e3128edc --- /dev/null +++ b/pandas/tests/indexes/interval/test_interval_range.py @@ -0,0 +1,301 @@ +from __future__ import division + +import pytest +import numpy as np +from datetime import timedelta +from pandas import ( + Interval, IntervalIndex, Timestamp, Timedelta, DateOffset, + interval_range, date_range, timedelta_range) +from pandas.tseries.offsets import Day +import pandas.util.testing as tm +import pandas as pd + + +@pytest.fixture(scope='class', params=['left', 'right', 'both', 'neither']) +def closed(request): + return request.param + + +@pytest.fixture(scope='class', params=[None, 'foo']) +def name(request): + return request.param + + +class TestIntervalRange(object): + + def test_construction_from_numeric(self, closed, name): + # combinations of start/end/periods without freq + expected = IntervalIndex.from_breaks( + np.arange(0, 6), name=name, closed=closed) + + result = interval_range(start=0, end=5, name=name, closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=0, periods=5, name=name, closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=5, periods=5, name=name, closed=closed) + tm.assert_index_equal(result, expected) + + # combinations of start/end/periods with freq + expected = IntervalIndex.from_tuples([(0, 2), (2, 4), (4, 6)], + name=name, closed=closed) + + result = interval_range(start=0, end=6, freq=2, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=0, periods=3, freq=2, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=6, periods=3, freq=2, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + # output truncates early if freq causes end to be skipped. + expected = IntervalIndex.from_tuples([(0.0, 1.5), (1.5, 3.0)], + name=name, closed=closed) + result = interval_range(start=0, end=4, freq=1.5, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('tz', [None, 'US/Eastern']) + def test_construction_from_timestamp(self, closed, name, tz): + # combinations of start/end/periods without freq + start = Timestamp('2017-01-01', tz=tz) + end = Timestamp('2017-01-06', tz=tz) + breaks = date_range(start=start, end=end) + expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) + + result = interval_range(start=start, end=end, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start, periods=5, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=end, periods=5, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + # combinations of start/end/periods with fixed freq + freq = '2D' + start = Timestamp('2017-01-01', tz=tz) + end = Timestamp('2017-01-07', tz=tz) + breaks = date_range(start=start, end=end, freq=freq) + expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) + + result = interval_range(start=start, end=end, freq=freq, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start, periods=3, freq=freq, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=end, periods=3, freq=freq, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + # output truncates early if freq causes end to be skipped. + end = Timestamp('2017-01-08', tz=tz) + result = interval_range(start=start, end=end, freq=freq, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + # combinations of start/end/periods with non-fixed freq + freq = 'M' + start = Timestamp('2017-01-01', tz=tz) + end = Timestamp('2017-12-31', tz=tz) + breaks = date_range(start=start, end=end, freq=freq) + expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) + + result = interval_range(start=start, end=end, freq=freq, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start, periods=11, freq=freq, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=end, periods=11, freq=freq, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + # output truncates early if freq causes end to be skipped. + end = Timestamp('2018-01-15', tz=tz) + result = interval_range(start=start, end=end, freq=freq, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + def test_construction_from_timedelta(self, closed, name): + # combinations of start/end/periods without freq + start, end = Timedelta('1 day'), Timedelta('6 days') + breaks = timedelta_range(start=start, end=end) + expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) + + result = interval_range(start=start, end=end, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start, periods=5, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=end, periods=5, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + # combinations of start/end/periods with fixed freq + freq = '2D' + start, end = Timedelta('1 day'), Timedelta('7 days') + breaks = timedelta_range(start=start, end=end, freq=freq) + expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) + + result = interval_range(start=start, end=end, freq=freq, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start, periods=3, freq=freq, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=end, periods=3, freq=freq, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + # output truncates early if freq causes end to be skipped. + end = Timedelta('7 days 1 hour') + result = interval_range(start=start, end=end, freq=freq, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + def test_constructor_coverage(self): + # float value for periods + expected = pd.interval_range(start=0, periods=10) + result = pd.interval_range(start=0, periods=10.5) + tm.assert_index_equal(result, expected) + + # equivalent timestamp-like start/end + start, end = Timestamp('2017-01-01'), Timestamp('2017-01-15') + expected = pd.interval_range(start=start, end=end) + + result = pd.interval_range(start=start.to_pydatetime(), + end=end.to_pydatetime()) + tm.assert_index_equal(result, expected) + + result = pd.interval_range(start=start.asm8, end=end.asm8) + tm.assert_index_equal(result, expected) + + # equivalent freq with timestamp + equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1), + DateOffset(days=1)] + for freq in equiv_freq: + result = pd.interval_range(start=start, end=end, freq=freq) + tm.assert_index_equal(result, expected) + + # equivalent timedelta-like start/end + start, end = Timedelta(days=1), Timedelta(days=10) + expected = pd.interval_range(start=start, end=end) + + result = pd.interval_range(start=start.to_pytimedelta(), + end=end.to_pytimedelta()) + tm.assert_index_equal(result, expected) + + result = pd.interval_range(start=start.asm8, end=end.asm8) + tm.assert_index_equal(result, expected) + + # equivalent freq with timedelta + equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1)] + for freq in equiv_freq: + result = pd.interval_range(start=start, end=end, freq=freq) + tm.assert_index_equal(result, expected) + + def test_errors(self): + # not enough params + msg = ('Of the three parameters: start, end, and periods, ' + 'exactly two must be specified') + + with tm.assert_raises_regex(ValueError, msg): + interval_range(start=0) + + with tm.assert_raises_regex(ValueError, msg): + interval_range(end=5) + + with tm.assert_raises_regex(ValueError, msg): + interval_range(periods=2) + + with tm.assert_raises_regex(ValueError, msg): + interval_range() + + # too many params + with tm.assert_raises_regex(ValueError, msg): + interval_range(start=0, end=5, periods=6) + + # mixed units + msg = 'start, end, freq need to be type compatible' + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=0, end=Timestamp('20130101'), freq=2) + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=0, end=Timedelta('1 day'), freq=2) + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=0, end=10, freq='D') + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timestamp('20130101'), end=10, freq='D') + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timestamp('20130101'), + end=Timedelta('1 day'), freq='D') + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timestamp('20130101'), + end=Timestamp('20130110'), freq=2) + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timedelta('1 day'), end=10, freq='D') + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timedelta('1 day'), + end=Timestamp('20130110'), freq='D') + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timedelta('1 day'), + end=Timedelta('10 days'), freq=2) + + # invalid periods + msg = 'periods must be a number, got foo' + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=0, periods='foo') + + # invalid start + msg = 'start must be numeric or datetime-like, got foo' + with tm.assert_raises_regex(ValueError, msg): + interval_range(start='foo', periods=10) + + # invalid end + msg = r'end must be numeric or datetime-like, got \(0, 1\]' + with tm.assert_raises_regex(ValueError, msg): + interval_range(end=Interval(0, 1), periods=10) + + # invalid freq for datetime-like + msg = 'freq must be numeric or convertible to DateOffset, got foo' + with tm.assert_raises_regex(ValueError, msg): + interval_range(start=0, end=10, freq='foo') + + with tm.assert_raises_regex(ValueError, msg): + interval_range(start=Timestamp('20130101'), periods=10, freq='foo') + + with tm.assert_raises_regex(ValueError, msg): + interval_range(end=Timedelta('1 day'), periods=10, freq='foo') + + # mixed tz + start = Timestamp('2017-01-01', tz='US/Eastern') + end = Timestamp('2017-01-07', tz='US/Pacific') + msg = 'Start and end cannot both be tz-aware with different timezones' + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=start, end=end) diff --git a/pandas/tests/indexes/interval/test_interval_tree.py b/pandas/tests/indexes/interval/test_interval_tree.py new file mode 100644 index 0000000000000..343131125f640 --- /dev/null +++ b/pandas/tests/indexes/interval/test_interval_tree.py @@ -0,0 +1,93 @@ +from __future__ import division + +import pytest +import numpy as np +from pandas import compat +from pandas._libs.interval import IntervalTree +import pandas.util.testing as tm + + +@pytest.fixture(scope='class', params=['left', 'right', 'both', 'neither']) +def closed(request): + return request.param + + +class TestIntervalTree(object): + def setup_method(self, method): + def gentree(dtype): + left = np.arange(5, dtype=dtype) + right = left + 2 + return IntervalTree(left, right) + + self.tree = gentree('int64') + self.trees = {dtype: gentree(dtype) + for dtype in ['int32', 'int64', 'float32', 'float64']} + + def test_get_loc(self): + for dtype, tree in self.trees.items(): + tm.assert_numpy_array_equal(tree.get_loc(1), + np.array([0], dtype='int64')) + tm.assert_numpy_array_equal(np.sort(tree.get_loc(2)), + np.array([0, 1], dtype='int64')) + with pytest.raises(KeyError): + tree.get_loc(-1) + + def test_get_indexer(self): + for dtype, tree in self.trees.items(): + tm.assert_numpy_array_equal( + tree.get_indexer(np.array([1.0, 5.5, 6.5])), + np.array([0, 4, -1], dtype='int64')) + with pytest.raises(KeyError): + tree.get_indexer(np.array([3.0])) + + def test_get_indexer_non_unique(self): + indexer, missing = self.tree.get_indexer_non_unique( + np.array([1.0, 2.0, 6.5])) + tm.assert_numpy_array_equal(indexer[:1], + np.array([0], dtype='int64')) + tm.assert_numpy_array_equal(np.sort(indexer[1:3]), + np.array([0, 1], dtype='int64')) + tm.assert_numpy_array_equal(np.sort(indexer[3:]), + np.array([-1], dtype='int64')) + tm.assert_numpy_array_equal(missing, np.array([2], dtype='int64')) + + def test_duplicates(self): + tree = IntervalTree([0, 0, 0], [1, 1, 1]) + tm.assert_numpy_array_equal(np.sort(tree.get_loc(0.5)), + np.array([0, 1, 2], dtype='int64')) + + with pytest.raises(KeyError): + tree.get_indexer(np.array([0.5])) + + indexer, missing = tree.get_indexer_non_unique(np.array([0.5])) + tm.assert_numpy_array_equal(np.sort(indexer), + np.array([0, 1, 2], dtype='int64')) + tm.assert_numpy_array_equal(missing, np.array([], dtype='int64')) + + def test_get_loc_closed(self, closed): + tree = IntervalTree([0], [1], closed=closed) + for p, errors in [(0, tree.open_left), + (1, tree.open_right)]: + if errors: + with pytest.raises(KeyError): + tree.get_loc(p) + else: + tm.assert_numpy_array_equal(tree.get_loc(p), + np.array([0], dtype='int64')) + + @pytest.mark.skipif(compat.is_platform_32bit(), + reason="int type mismatch on 32bit") + @pytest.mark.parametrize('leaf_size', [1, 10, 100, 10000]) + def test_get_indexer_closed(self, closed, leaf_size): + x = np.arange(1000, dtype='float64') + found = x.astype('intp') + not_found = (-1 * np.ones(1000)).astype('intp') + + tree = IntervalTree(x, x + 0.5, closed=closed, leaf_size=leaf_size) + tm.assert_numpy_array_equal(found, tree.get_indexer(x + 0.25)) + + expected = found if tree.closed_left else not_found + tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.0)) + + expected = found if tree.closed_right else not_found + tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5))