Skip to content

[ENH] Move intersection functions for DatetimeIndex and TimedeltaIndex to Datetimelike and added tests #25121

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Other Enhancements
^^^^^^^^^^^^^^^^^^

- :meth:`Timestamp.replace` now supports the ``fold`` argument to disambiguate DST transition times (:issue:`25017`)
-
- :meth:`DatetimeIndex.intersection` and :meth:`TimedeltaIndex.intersection` have been moved to :meth:`Datetimelike.intersection` and tests were added for :meth:`TimedeltaIndex.intersection` (:issue:`24966`).
-

.. _whatsnew_0250.api_breaking:
Expand Down
78 changes: 78 additions & 0 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from pandas.core.tools.timedeltas import to_timedelta

import pandas.io.formats.printing as printing
from pandas.tseries.frequencies import to_offset

_index_doc_kwargs = dict(ibase._index_doc_kwargs)

Expand Down Expand Up @@ -530,6 +531,83 @@ def isin(self, values):

return algorithms.isin(self.asi8, values.asi8)

def intersection(self, other, sort=False):
"""
Specialized intersection for DatetimeIndex and TimedeltaIndex objects.
May be much faster than Index.intersection

Parameters
----------
other : DatetimeIndex or TimedeltaIndex or array-like
sort : False or None, default False
Sort the resulting index if possible.

.. versionadded:: 0.24.0

.. versionchanged:: 0.24.1

Changed the default to ``False`` to match the behaviour
from before 0.24.0.

Returns
-------
y : Index or DatetimeIndex or TimedeltaIndex
"""
self._validate_sort_keyword(sort)
self._assert_can_do_setop(other)

if self.equals(other):
return self._get_reconciled_name_object(other)

if not isinstance(other, type(self)):
try:
other = self(other)
except (TypeError, ValueError):
pass
result = Index.intersection(self, other, sort=sort)
if isinstance(result, type(self)):
if result.freq is None:
result.freq = to_offset(result.inferred_freq)
return result

elif (other.freq is None or self.freq is None or
other.freq != self.freq or
not other.freq.isAnchored() or
(not self.is_monotonic or not other.is_monotonic)):
result = Index.intersection(self, other, sort=sort)
# Invalidate the freq of `result`, which may not be correct at
# this point, depending on the values.
result.freq = None
if hasattr(self, 'tz'):
result = self._shallow_copy(result._values, name=result.name,
tz=result.tz, freq=None)
else:
result = self._shallow_copy(result._values, name=result.name,
freq=None)
if result.freq is None:
result.freq = to_offset(result.inferred_freq)
return result

if len(self) == 0:
return self
if len(other) == 0:
return other
# to make our life easier, "sort" the two ranges
if self[0] <= other[0]:
left, right = self, other
else:
left, right = other, self

end = min(left[-1], right[-1])
start = right[0]

if end < start:
return type(self)(data=[])
else:
lslice = slice(*left.slice_locs(start, end))
left_chunk = left.values[lslice]
return self._shallow_copy(left_chunk)

@Appender(_index_shared_docs['repeat'] % _index_doc_kwargs)
def repeat(self, repeats, axis=None):
nv.validate_repeat(tuple(), dict(axis=axis))
Expand Down
73 changes: 0 additions & 73 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,79 +594,6 @@ def _wrap_setop_result(self, other, result):
name = get_op_result_name(self, other)
return self._shallow_copy(result, name=name, freq=None, tz=self.tz)

def intersection(self, other, sort=False):
"""
Specialized intersection for DatetimeIndex objects. May be much faster
than Index.intersection

Parameters
----------
other : DatetimeIndex or array-like
sort : False or None, default False
Sort the resulting index if possible.

.. versionadded:: 0.24.0

.. versionchanged:: 0.24.1

Changed the default to ``False`` to match the behaviour
from before 0.24.0.

Returns
-------
y : Index or DatetimeIndex
"""
self._validate_sort_keyword(sort)
self._assert_can_do_setop(other)

if self.equals(other):
return self._get_reconciled_name_object(other)

if not isinstance(other, DatetimeIndex):
try:
other = DatetimeIndex(other)
except (TypeError, ValueError):
pass
result = Index.intersection(self, other, sort=sort)
if isinstance(result, DatetimeIndex):
if result.freq is None:
result.freq = to_offset(result.inferred_freq)
return result

elif (other.freq is None or self.freq is None or
other.freq != self.freq or
not other.freq.isAnchored() or
(not self.is_monotonic or not other.is_monotonic)):
result = Index.intersection(self, other, sort=sort)
# Invalidate the freq of `result`, which may not be correct at
# this point, depending on the values.
result.freq = None
result = self._shallow_copy(result._values, name=result.name,
tz=result.tz, freq=None)
if result.freq is None:
result.freq = to_offset(result.inferred_freq)
return result

if len(self) == 0:
return self
if len(other) == 0:
return other
# to make our life easier, "sort" the two ranges
if self[0] <= other[0]:
left, right = self, other
else:
left, right = other, self

end = min(left[-1], right[-1])
start = right[0]

if end < start:
return type(self)(data=[])
else:
lslice = slice(*left.slice_locs(start, end))
left_chunk = left.values[lslice]
return self._shallow_copy(left_chunk)

# --------------------------------------------------------------------

def _get_time_micros(self):
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,9 @@ def join(self, other, how='left', level=None, return_indexers=False,
return self._apply_meta(result), lidx, ridx
return self._apply_meta(result)

def intersection(self, other, sort=False):
return Index.intersection(self, other, sort=sort)

def _assert_can_do_setop(self, other):
super(PeriodIndex, self)._assert_can_do_setop(other)

Expand Down
46 changes: 0 additions & 46 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,52 +439,6 @@ def _fast_union(self, other):
else:
return left

def intersection(self, other):
"""
Specialized intersection for TimedeltaIndex objects. May be much faster
than Index.intersection

Parameters
----------
other : TimedeltaIndex or array-like

Returns
-------
y : Index or TimedeltaIndex
"""
self._assert_can_do_setop(other)

if self.equals(other):
return self._get_reconciled_name_object(other)

if not isinstance(other, TimedeltaIndex):
try:
other = TimedeltaIndex(other)
except (TypeError, ValueError):
pass
result = Index.intersection(self, other)
return result

if len(self) == 0:
return self
if len(other) == 0:
return other
# to make our life easier, "sort" the two ranges
if self[0] <= other[0]:
left, right = self, other
else:
left, right = other, self

end = min(left[-1], right[-1])
start = right[0]

if end < start:
return type(self)(data=[])
else:
lslice = slice(*left.slice_locs(start, end))
left_chunk = left.values[lslice]
return self._shallow_copy(left_chunk)

def _maybe_promote(self, other):
if other.inferred_type == 'timedelta':
other = TimedeltaIndex(other)
Expand Down
78 changes: 78 additions & 0 deletions pandas/tests/indexes/timedeltas/test_setops.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
import pytest

import pandas as pd
from pandas import Int64Index, TimedeltaIndex, timedelta_range
Expand Down Expand Up @@ -73,3 +74,80 @@ def test_intersection_bug_1708(self):
result = index_1 & index_2
expected = timedelta_range('1 day 01:00:00', periods=3, freq='h')
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize("sort", [None, False])
def test_intersection_equal(self, sort):
# for equal indicies intersection should return the original index
first = timedelta_range('1 day', periods=4, freq='h')
second = timedelta_range('1 day', periods=4, freq='h')
intersect = first.intersection(second, sort=sort)
if sort is None:
tm.assert_index_equal(intersect, second.sort_values())
assert tm.equalContents(intersect, second)

# Corner cases
inter = first.intersection(first, sort=sort)
assert inter is first

@pytest.mark.parametrize("sort", [None, False])
def test_intersection_zero_length(self, sort):
index_1 = timedelta_range('1 day', periods=4, freq='h')
index_2 = timedelta_range('1 day', periods=0, freq='h')
inter = index_1.intersection(index_2, sort=sort)
tm.assert_index_equal(index_2, inter)
inter_2 = index_2.intersection(index_1, sort=sort)
tm.assert_index_equal(index_2, inter_2)

@pytest.mark.parametrize("sort", [None, False])
def test_intersection(self, sort):
# GH 4690 (with tz)
base = timedelta_range('1 day', periods=4, freq='h', name='idx')

# if target has the same name, it is preserved
rng2 = timedelta_range('1 day', periods=5, freq='h', name='idx')
expected2 = timedelta_range('1 day', periods=4, freq='h', name='idx')

# if target name is different, it will be reset
rng3 = timedelta_range('1 day', periods=5, freq='h', name='other')
expected3 = timedelta_range('1 day', periods=4, freq='h', name=None)

rng4 = timedelta_range('1 day', periods=10, freq='h', name='idx')[5:]
expected4 = TimedeltaIndex([], name='idx')

for (rng, expected) in [(rng2, expected2), (rng3, expected3),
(rng4, expected4)]:
result = base.intersection(rng)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq

@pytest.mark.parametrize("sort", [None, False])
def intersection_non_monotonic(self, sort):
# non-monotonic
base = TimedeltaIndex(['1 hour', '2 hour',
'4 hour', '3 hour'],
name='idx')

rng2 = TimedeltaIndex(['5 hour', '2 hour',
'4 hour', '9 hour'],
name='idx')
expected2 = TimedeltaIndex(['2 hour', '4 hour'],
name='idx')

rng3 = TimedeltaIndex(['2 hour', '5 hour',
'5 hour', '1 hour'],
name='other')
expected3 = TimedeltaIndex(['1 hour', '2 hour'],
name=None)

rng4 = base[::-1]
expected4 = base

for (rng, expected) in [(rng2, expected2), (rng3, expected3),
(rng4, expected4)]:
result = base.intersection(rng, sort=sort)
if sort is None:
expected = expected.sort_values()
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq is None