Skip to content

TST: Split tests/indexes/interval/test_interval.py into separate files #19009

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 30, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
369 changes: 1 addition & 368 deletions pandas/tests/indexes/interval/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,11 @@

import pytest
import numpy as np
from datetime import timedelta
from pandas import (
Interval, IntervalIndex, Index, isna, notna, interval_range, Timestamp,
Timedelta, compat, date_range, timedelta_range, DateOffset)
Timedelta, date_range, timedelta_range)
from pandas.compat import lzip
from pandas.core.common import _asarray_tuplesafe
from pandas.tseries.offsets import Day
from pandas._libs.interval import IntervalTree
from pandas.tests.indexes.common import Base
import pandas.util.testing as tm
import pandas as pd
Expand Down Expand Up @@ -1158,367 +1155,3 @@ def test_to_tuples_na(self, tuples, na_tuple):
assert all(isna(x) for x in result_na)
else:
assert isna(result_na)


class TestIntervalRange(object):

def test_construction_from_numeric(self, closed, name):
# combinations of start/end/periods without freq
expected = IntervalIndex.from_breaks(
np.arange(0, 6), name=name, closed=closed)

result = interval_range(start=0, end=5, name=name, closed=closed)
tm.assert_index_equal(result, expected)

result = interval_range(start=0, periods=5, name=name, closed=closed)
tm.assert_index_equal(result, expected)

result = interval_range(end=5, periods=5, name=name, closed=closed)
tm.assert_index_equal(result, expected)

# combinations of start/end/periods with freq
expected = IntervalIndex.from_tuples([(0, 2), (2, 4), (4, 6)],
name=name, closed=closed)

result = interval_range(start=0, end=6, freq=2, name=name,
closed=closed)
tm.assert_index_equal(result, expected)

result = interval_range(start=0, periods=3, freq=2, name=name,
closed=closed)
tm.assert_index_equal(result, expected)

result = interval_range(end=6, periods=3, freq=2, name=name,
closed=closed)
tm.assert_index_equal(result, expected)

# output truncates early if freq causes end to be skipped.
expected = IntervalIndex.from_tuples([(0.0, 1.5), (1.5, 3.0)],
name=name, closed=closed)
result = interval_range(start=0, end=4, freq=1.5, name=name,
closed=closed)
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize('tz', [None, 'US/Eastern'])
def test_construction_from_timestamp(self, closed, name, tz):
# combinations of start/end/periods without freq
start = Timestamp('2017-01-01', tz=tz)
end = Timestamp('2017-01-06', tz=tz)
breaks = date_range(start=start, end=end)
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)

result = interval_range(start=start, end=end, name=name,
closed=closed)
tm.assert_index_equal(result, expected)

result = interval_range(start=start, periods=5, name=name,
closed=closed)
tm.assert_index_equal(result, expected)

result = interval_range(end=end, periods=5, name=name,
closed=closed)
tm.assert_index_equal(result, expected)

# combinations of start/end/periods with fixed freq
freq = '2D'
start = Timestamp('2017-01-01', tz=tz)
end = Timestamp('2017-01-07', tz=tz)
breaks = date_range(start=start, end=end, freq=freq)
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)

result = interval_range(start=start, end=end, freq=freq, name=name,
closed=closed)
tm.assert_index_equal(result, expected)

result = interval_range(start=start, periods=3, freq=freq, name=name,
closed=closed)
tm.assert_index_equal(result, expected)

result = interval_range(end=end, periods=3, freq=freq, name=name,
closed=closed)
tm.assert_index_equal(result, expected)

# output truncates early if freq causes end to be skipped.
end = Timestamp('2017-01-08', tz=tz)
result = interval_range(start=start, end=end, freq=freq, name=name,
closed=closed)
tm.assert_index_equal(result, expected)

# combinations of start/end/periods with non-fixed freq
freq = 'M'
start = Timestamp('2017-01-01', tz=tz)
end = Timestamp('2017-12-31', tz=tz)
breaks = date_range(start=start, end=end, freq=freq)
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)

result = interval_range(start=start, end=end, freq=freq, name=name,
closed=closed)
tm.assert_index_equal(result, expected)

result = interval_range(start=start, periods=11, freq=freq, name=name,
closed=closed)
tm.assert_index_equal(result, expected)

result = interval_range(end=end, periods=11, freq=freq, name=name,
closed=closed)
tm.assert_index_equal(result, expected)

# output truncates early if freq causes end to be skipped.
end = Timestamp('2018-01-15', tz=tz)
result = interval_range(start=start, end=end, freq=freq, name=name,
closed=closed)
tm.assert_index_equal(result, expected)

def test_construction_from_timedelta(self, closed, name):
# combinations of start/end/periods without freq
start, end = Timedelta('1 day'), Timedelta('6 days')
breaks = timedelta_range(start=start, end=end)
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)

result = interval_range(start=start, end=end, name=name,
closed=closed)
tm.assert_index_equal(result, expected)

result = interval_range(start=start, periods=5, name=name,
closed=closed)
tm.assert_index_equal(result, expected)

result = interval_range(end=end, periods=5, name=name,
closed=closed)
tm.assert_index_equal(result, expected)

# combinations of start/end/periods with fixed freq
freq = '2D'
start, end = Timedelta('1 day'), Timedelta('7 days')
breaks = timedelta_range(start=start, end=end, freq=freq)
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)

result = interval_range(start=start, end=end, freq=freq, name=name,
closed=closed)
tm.assert_index_equal(result, expected)

result = interval_range(start=start, periods=3, freq=freq, name=name,
closed=closed)
tm.assert_index_equal(result, expected)

result = interval_range(end=end, periods=3, freq=freq, name=name,
closed=closed)
tm.assert_index_equal(result, expected)

# output truncates early if freq causes end to be skipped.
end = Timedelta('7 days 1 hour')
result = interval_range(start=start, end=end, freq=freq, name=name,
closed=closed)
tm.assert_index_equal(result, expected)

def test_constructor_coverage(self):
# float value for periods
expected = pd.interval_range(start=0, periods=10)
result = pd.interval_range(start=0, periods=10.5)
tm.assert_index_equal(result, expected)

# equivalent timestamp-like start/end
start, end = Timestamp('2017-01-01'), Timestamp('2017-01-15')
expected = pd.interval_range(start=start, end=end)

result = pd.interval_range(start=start.to_pydatetime(),
end=end.to_pydatetime())
tm.assert_index_equal(result, expected)

result = pd.interval_range(start=start.asm8, end=end.asm8)
tm.assert_index_equal(result, expected)

# equivalent freq with timestamp
equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1),
DateOffset(days=1)]
for freq in equiv_freq:
result = pd.interval_range(start=start, end=end, freq=freq)
tm.assert_index_equal(result, expected)

# equivalent timedelta-like start/end
start, end = Timedelta(days=1), Timedelta(days=10)
expected = pd.interval_range(start=start, end=end)

result = pd.interval_range(start=start.to_pytimedelta(),
end=end.to_pytimedelta())
tm.assert_index_equal(result, expected)

result = pd.interval_range(start=start.asm8, end=end.asm8)
tm.assert_index_equal(result, expected)

# equivalent freq with timedelta
equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1)]
for freq in equiv_freq:
result = pd.interval_range(start=start, end=end, freq=freq)
tm.assert_index_equal(result, expected)

def test_errors(self):
# not enough params
msg = ('Of the three parameters: start, end, and periods, '
'exactly two must be specified')

with tm.assert_raises_regex(ValueError, msg):
interval_range(start=0)

with tm.assert_raises_regex(ValueError, msg):
interval_range(end=5)

with tm.assert_raises_regex(ValueError, msg):
interval_range(periods=2)

with tm.assert_raises_regex(ValueError, msg):
interval_range()

# too many params
with tm.assert_raises_regex(ValueError, msg):
interval_range(start=0, end=5, periods=6)

# mixed units
msg = 'start, end, freq need to be type compatible'
with tm.assert_raises_regex(TypeError, msg):
interval_range(start=0, end=Timestamp('20130101'), freq=2)

with tm.assert_raises_regex(TypeError, msg):
interval_range(start=0, end=Timedelta('1 day'), freq=2)

with tm.assert_raises_regex(TypeError, msg):
interval_range(start=0, end=10, freq='D')

with tm.assert_raises_regex(TypeError, msg):
interval_range(start=Timestamp('20130101'), end=10, freq='D')

with tm.assert_raises_regex(TypeError, msg):
interval_range(start=Timestamp('20130101'),
end=Timedelta('1 day'), freq='D')

with tm.assert_raises_regex(TypeError, msg):
interval_range(start=Timestamp('20130101'),
end=Timestamp('20130110'), freq=2)

with tm.assert_raises_regex(TypeError, msg):
interval_range(start=Timedelta('1 day'), end=10, freq='D')

with tm.assert_raises_regex(TypeError, msg):
interval_range(start=Timedelta('1 day'),
end=Timestamp('20130110'), freq='D')

with tm.assert_raises_regex(TypeError, msg):
interval_range(start=Timedelta('1 day'),
end=Timedelta('10 days'), freq=2)

# invalid periods
msg = 'periods must be a number, got foo'
with tm.assert_raises_regex(TypeError, msg):
interval_range(start=0, periods='foo')

# invalid start
msg = 'start must be numeric or datetime-like, got foo'
with tm.assert_raises_regex(ValueError, msg):
interval_range(start='foo', periods=10)

# invalid end
msg = r'end must be numeric or datetime-like, got \(0, 1\]'
with tm.assert_raises_regex(ValueError, msg):
interval_range(end=Interval(0, 1), periods=10)

# invalid freq for datetime-like
msg = 'freq must be numeric or convertible to DateOffset, got foo'
with tm.assert_raises_regex(ValueError, msg):
interval_range(start=0, end=10, freq='foo')

with tm.assert_raises_regex(ValueError, msg):
interval_range(start=Timestamp('20130101'), periods=10, freq='foo')

with tm.assert_raises_regex(ValueError, msg):
interval_range(end=Timedelta('1 day'), periods=10, freq='foo')

# mixed tz
start = Timestamp('2017-01-01', tz='US/Eastern')
end = Timestamp('2017-01-07', tz='US/Pacific')
msg = 'Start and end cannot both be tz-aware with different timezones'
with tm.assert_raises_regex(TypeError, msg):
interval_range(start=start, end=end)


class TestIntervalTree(object):
def setup_method(self, method):
gentree = lambda dtype: IntervalTree(np.arange(5, dtype=dtype),
np.arange(5, dtype=dtype) + 2)
self.tree = gentree('int64')
self.trees = {dtype: gentree(dtype)
for dtype in ['int32', 'int64', 'float32', 'float64']}

def test_get_loc(self):
for dtype, tree in self.trees.items():
tm.assert_numpy_array_equal(tree.get_loc(1),
np.array([0], dtype='int64'))
tm.assert_numpy_array_equal(np.sort(tree.get_loc(2)),
np.array([0, 1], dtype='int64'))
with pytest.raises(KeyError):
tree.get_loc(-1)

def test_get_indexer(self):
for dtype, tree in self.trees.items():
tm.assert_numpy_array_equal(
tree.get_indexer(np.array([1.0, 5.5, 6.5])),
np.array([0, 4, -1], dtype='int64'))
with pytest.raises(KeyError):
tree.get_indexer(np.array([3.0]))

def test_get_indexer_non_unique(self):
indexer, missing = self.tree.get_indexer_non_unique(
np.array([1.0, 2.0, 6.5]))
tm.assert_numpy_array_equal(indexer[:1],
np.array([0], dtype='int64'))
tm.assert_numpy_array_equal(np.sort(indexer[1:3]),
np.array([0, 1], dtype='int64'))
tm.assert_numpy_array_equal(np.sort(indexer[3:]),
np.array([-1], dtype='int64'))
tm.assert_numpy_array_equal(missing, np.array([2], dtype='int64'))

def test_duplicates(self):
tree = IntervalTree([0, 0, 0], [1, 1, 1])
tm.assert_numpy_array_equal(np.sort(tree.get_loc(0.5)),
np.array([0, 1, 2], dtype='int64'))

with pytest.raises(KeyError):
tree.get_indexer(np.array([0.5]))

indexer, missing = tree.get_indexer_non_unique(np.array([0.5]))
tm.assert_numpy_array_equal(np.sort(indexer),
np.array([0, 1, 2], dtype='int64'))
tm.assert_numpy_array_equal(missing, np.array([], dtype='int64'))

def test_get_loc_closed(self):
for closed in ['left', 'right', 'both', 'neither']:
tree = IntervalTree([0], [1], closed=closed)
for p, errors in [(0, tree.open_left),
(1, tree.open_right)]:
if errors:
with pytest.raises(KeyError):
tree.get_loc(p)
else:
tm.assert_numpy_array_equal(tree.get_loc(p),
np.array([0], dtype='int64'))

@pytest.mark.skipif(compat.is_platform_32bit(),
reason="int type mismatch on 32bit")
def test_get_indexer_closed(self):
x = np.arange(1000, dtype='float64')
found = x.astype('intp')
not_found = (-1 * np.ones(1000)).astype('intp')

for leaf_size in [1, 10, 100, 10000]:
for closed in ['left', 'right', 'both', 'neither']:
tree = IntervalTree(x, x + 0.5, closed=closed,
leaf_size=leaf_size)
tm.assert_numpy_array_equal(found,
tree.get_indexer(x + 0.25))

expected = found if tree.closed_left else not_found
tm.assert_numpy_array_equal(expected,
tree.get_indexer(x + 0.0))

expected = found if tree.closed_right else not_found
tm.assert_numpy_array_equal(expected,
tree.get_indexer(x + 0.5))
Loading