Skip to content

ENH: Add sort parameter to RangeIndex.union (#24471) #25788

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Other Enhancements
- ``Series.str`` has gained :meth:`Series.str.casefold` method to removes all case distinctions present in a string (:issue:`25405`)
- :meth:`DataFrame.set_index` now works for instances of ``abc.Iterator``, provided their output is of the same length as the calling frame (:issue:`22484`, :issue:`24984`)
- :meth:`DatetimeIndex.union` now supports the ``sort`` argument. The behaviour of the sort parameter matches that of :meth:`Index.union` (:issue:`24994`)
- :meth:`RangeIndex.union` now supports the ``sort`` argument. If ``sort=False`` an unsorted ``Int64Index`` is always returned. ``sort=None`` is the default and returns a mononotically increasing ``RangeIndex`` if possible or a sorted ``Int64Index`` if not (:issue:`24471`)
- :meth:`DataFrame.rename` now supports the ``errors`` argument to raise errors when attempting to rename nonexistent keys (:issue:`13473`)
- :class:`RangeIndex` has gained :attr:`~RangeIndex.start`, :attr:`~RangeIndex.stop`, and :attr:`~RangeIndex.step` attributes (:issue:`25710`)
- :class:`datetime.timezone` objects are now supported as arguments to timezone methods and constructors (:issue:`25065`)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2320,7 +2320,7 @@ def union(self, other, sort=None):
else:
rvals = other._values

if self.is_monotonic and other.is_monotonic:
if sort is None and self.is_monotonic and other.is_monotonic:
try:
result = self._outer_indexer(lvals, rvals)[0]
except TypeError:
Expand Down
16 changes: 12 additions & 4 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,23 +463,31 @@ def _extended_gcd(self, a, b):
old_t, t = t, old_t - quotient * t
return old_r, old_s, old_t

def union(self, other):
def union(self, other, sort=None):
"""
Form the union of two Index objects and sorts if possible

Parameters
----------
other : Index or array-like

sort : False or None, default None
Whether to sort resulting index. ``sort=None`` returns a
mononotically increasing ``RangeIndex`` if possible or a sorted
``Int64Index`` if not. ``sort=False`` always returns an
unsorted ``Int64Index``

.. versionadded:: 0.25.0

Returns
-------
union : Index
"""
self._assert_can_do_setop(other)
if len(other) == 0 or self.equals(other) or len(self) == 0:
return super(RangeIndex, self).union(other)
return super(RangeIndex, self).union(other, sort=sort)

if isinstance(other, RangeIndex):
if isinstance(other, RangeIndex) and sort is None:
start_s, step_s = self._start, self._step
end_s = self._start + self._step * (len(self) - 1)
start_o, step_o = other._start, other._step
Expand Down Expand Up @@ -516,7 +524,7 @@ def union(self, other):
(end_s - step_o <= end_o)):
return RangeIndex(start_r, end_r + step_o, step_o)

return self._int64index.union(other)
return self._int64index.union(other, sort=sort)

@Appender(_index_shared_docs['join'])
def join(self, other, how='left', level=None, return_indexers=False,
Expand Down
12 changes: 10 additions & 2 deletions pandas/tests/indexes/datetimes/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,11 @@ def test_union_bug_1730(self, sort):
rng_b = date_range('1/1/2012', periods=4, freq='4H')

result = rng_a.union(rng_b, sort=sort)
exp = DatetimeIndex(sorted(set(list(rng_a)) | set(list(rng_b))))
exp = list(rng_a) + list(rng_b[1:])
if sort is None:
exp = DatetimeIndex(sorted(exp))
else:
exp = DatetimeIndex(exp)
tm.assert_index_equal(result, exp)

@pytest.mark.parametrize("sort", [None, False])
Expand All @@ -112,7 +116,11 @@ def test_union_bug_4564(self, sort):
right = left + DateOffset(minutes=15)

result = left.union(right, sort=sort)
exp = DatetimeIndex(sorted(set(list(left)) | set(list(right))))
exp = list(left) + list(right)
if sort is None:
exp = DatetimeIndex(sorted(exp))
else:
exp = DatetimeIndex(exp)
tm.assert_index_equal(result, exp)

@pytest.mark.parametrize("sort", [None, False])
Expand Down
11 changes: 9 additions & 2 deletions pandas/tests/indexes/period/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,12 @@ def test_union(self, sort):
# union
other1 = pd.period_range('1/1/2000', freq='D', periods=5)
rng1 = pd.period_range('1/6/2000', freq='D', periods=5)
expected1 = pd.period_range('1/1/2000', freq='D', periods=10)
expected1 = pd.PeriodIndex(['2000-01-06', '2000-01-07',
'2000-01-08', '2000-01-09',
'2000-01-10', '2000-01-01',
'2000-01-02', '2000-01-03',
'2000-01-04', '2000-01-05'],
freq='D')

rng2 = pd.period_range('1/1/2000', freq='D', periods=5)
other2 = pd.period_range('1/4/2000', freq='D', periods=5)
Expand Down Expand Up @@ -77,7 +82,9 @@ def test_union(self, sort):

rng7 = pd.period_range('2003-01-01', freq='A', periods=5)
other7 = pd.period_range('1998-01-01', freq='A', periods=8)
expected7 = pd.period_range('1998-01-01', freq='A', periods=10)
expected7 = pd.PeriodIndex(['2003', '2004', '2005', '2006', '2007',
'1998', '1999', '2000', '2001', '2002'],
freq='A')

rng8 = pd.PeriodIndex(['1/3/2000', '1/2/2000', '1/1/2000',
'1/5/2000', '1/4/2000'], freq='D')
Expand Down
169 changes: 100 additions & 69 deletions pandas/tests/indexes/test_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@

from .test_numeric import Numeric

# aliases to make some tests easier to read
RI = RangeIndex
I64 = Int64Index
F64 = Float64Index
OI = Index


class TestRangeIndex(Numeric):
_holder = RangeIndex
Expand Down Expand Up @@ -565,51 +571,73 @@ def test_intersection(self, sort):
expected = RangeIndex(0, 0, 1)
tm.assert_index_equal(result, expected)

def test_union_noncomparable(self):
@pytest.mark.parametrize('sort', [False, None])
def test_union_noncomparable(self, sort):
from datetime import datetime, timedelta
# corner case, non-Int64Index
now = datetime.now()
other = Index([now + timedelta(i) for i in range(4)], dtype=object)
result = self.index.union(other)
result = self.index.union(other, sort=sort)
expected = Index(np.concatenate((self.index, other)))
tm.assert_index_equal(result, expected)

result = other.union(self.index)
result = other.union(self.index, sort=sort)
expected = Index(np.concatenate((other, self.index)))
tm.assert_index_equal(result, expected)

def test_union(self):
RI = RangeIndex
I64 = Int64Index
cases = [(RI(0, 10, 1), RI(0, 10, 1), RI(0, 10, 1)),
(RI(0, 10, 1), RI(5, 20, 1), RI(0, 20, 1)),
(RI(0, 10, 1), RI(10, 20, 1), RI(0, 20, 1)),
(RI(0, -10, -1), RI(0, -10, -1), RI(0, -10, -1)),
(RI(0, -10, -1), RI(-10, -20, -1), RI(-19, 1, 1)),
(RI(0, 10, 2), RI(1, 10, 2), RI(0, 10, 1)),
(RI(0, 11, 2), RI(1, 12, 2), RI(0, 12, 1)),
(RI(0, 21, 4), RI(-2, 24, 4), RI(-2, 24, 2)),
(RI(0, -20, -2), RI(-1, -21, -2), RI(-19, 1, 1)),
(RI(0, 100, 5), RI(0, 100, 20), RI(0, 100, 5)),
(RI(0, -100, -5), RI(5, -100, -20), RI(-95, 10, 5)),
(RI(0, -11, -1), RI(1, -12, -4), RI(-11, 2, 1)),
(RI(0), RI(0), RI(0)),
(RI(0, -10, -2), RI(0), RI(0, -10, -2)),
(RI(0, 100, 2), RI(100, 150, 200), RI(0, 102, 2)),
(RI(0, -100, -2), RI(-100, 50, 102), RI(-100, 4, 2)),
(RI(0, -100, -1), RI(0, -50, -3), RI(-99, 1, 1)),
(RI(0, 1, 1), RI(5, 6, 10), RI(0, 6, 5)),
(RI(0, 10, 5), RI(-5, -6, -20), RI(-5, 10, 5)),
(RI(0, 3, 1), RI(4, 5, 1), I64([0, 1, 2, 4])),
(RI(0, 10, 1), I64([]), RI(0, 10, 1)),
(RI(0), I64([1, 5, 6]), I64([1, 5, 6]))]
for idx1, idx2, expected in cases:
res1 = idx1.union(idx2)
res2 = idx2.union(idx1)
res3 = idx1._int64index.union(idx2)
tm.assert_index_equal(res1, expected, exact=True)
tm.assert_index_equal(res2, expected, exact=True)
tm.assert_index_equal(res3, expected)
@pytest.fixture(params=[
(RI(0, 10, 1), RI(0, 10, 1), RI(0, 10, 1), RI(0, 10, 1)),
(RI(0, 10, 1), RI(5, 20, 1), RI(0, 20, 1), I64(range(20))),
(RI(0, 10, 1), RI(10, 20, 1), RI(0, 20, 1), I64(range(20))),
(RI(0, -10, -1), RI(0, -10, -1), RI(0, -10, -1), RI(0, -10, -1)),
(RI(0, -10, -1), RI(-10, -20, -1), RI(-19, 1, 1),
I64(range(0, -20, -1))),
(RI(0, 10, 2), RI(1, 10, 2), RI(0, 10, 1),
I64(list(range(0, 10, 2)) + list(range(1, 10, 2)))),
(RI(0, 11, 2), RI(1, 12, 2), RI(0, 12, 1),
I64(list(range(0, 11, 2)) + list(range(1, 12, 2)))),
(RI(0, 21, 4), RI(-2, 24, 4), RI(-2, 24, 2),
I64(list(range(0, 21, 4)) + list(range(-2, 24, 4)))),
(RI(0, -20, -2), RI(-1, -21, -2), RI(-19, 1, 1),
I64(list(range(0, -20, -2)) + list(range(-1, -21, -2)))),
(RI(0, 100, 5), RI(0, 100, 20), RI(0, 100, 5), I64(range(0, 100, 5))),
(RI(0, -100, -5), RI(5, -100, -20), RI(-95, 10, 5),
I64(list(range(0, -100, -5)) + [5])),
(RI(0, -11, -1), RI(1, -12, -4), RI(-11, 2, 1),
I64(list(range(0, -11, -1)) + [1, -11])),
(RI(0), RI(0), RI(0), RI(0)),
(RI(0, -10, -2), RI(0), RI(0, -10, -2), RI(0, -10, -2)),
(RI(0, 100, 2), RI(100, 150, 200), RI(0, 102, 2),
I64(range(0, 102, 2))),
(RI(0, -100, -2), RI(-100, 50, 102), RI(-100, 4, 2),
I64(list(range(0, -100, -2)) + [-100, 2])),
(RI(0, -100, -1), RI(0, -50, -3), RI(-99, 1, 1),
I64(list(range(0, -100, -1)))),
(RI(0, 1, 1), RI(5, 6, 10), RI(0, 6, 5), I64([0, 5])),
(RI(0, 10, 5), RI(-5, -6, -20), RI(-5, 10, 5), I64([0, 5, -5])),
(RI(0, 3, 1), RI(4, 5, 1), I64([0, 1, 2, 4]), I64([0, 1, 2, 4])),
(RI(0, 10, 1), I64([]), RI(0, 10, 1), RI(0, 10, 1)),
(RI(0), I64([1, 5, 6]), I64([1, 5, 6]), I64([1, 5, 6]))
])
def unions(self, request):
"""Inputs and expected outputs for RangeIndex.union tests"""

return request.param

def test_union_sorted(self, unions):

idx1, idx2, expected_sorted, expected_notsorted = unions

res1 = idx1.union(idx2, sort=None)
tm.assert_index_equal(res1, expected_sorted, exact=True)

res1 = idx1.union(idx2, sort=False)
tm.assert_index_equal(res1, expected_notsorted, exact=True)

res2 = idx2.union(idx1, sort=None)
res3 = idx1._int64index.union(idx2, sort=None)
tm.assert_index_equal(res2, expected_sorted, exact=True)
tm.assert_index_equal(res3, expected_sorted)

def test_nbytes(self):

Expand Down Expand Up @@ -840,38 +868,41 @@ def test_len_specialised(self):
i = RangeIndex(0, 5, step)
assert len(i) == 0

def test_append(self):
@pytest.fixture(params=[
([RI(1, 12, 5)], RI(1, 12, 5)),
([RI(0, 6, 4)], RI(0, 6, 4)),
([RI(1, 3), RI(3, 7)], RI(1, 7)),
([RI(1, 5, 2), RI(5, 6)], RI(1, 6, 2)),
([RI(1, 3, 2), RI(4, 7, 3)], RI(1, 7, 3)),
([RI(-4, 3, 2), RI(4, 7, 2)], RI(-4, 7, 2)),
([RI(-4, -8), RI(-8, -12)], RI(0, 0)),
([RI(-4, -8), RI(3, -4)], RI(0, 0)),
([RI(-4, -8), RI(3, 5)], RI(3, 5)),
([RI(-4, -2), RI(3, 5)], I64([-4, -3, 3, 4])),
([RI(-2,), RI(3, 5)], RI(3, 5)),
([RI(2,), RI(2)], I64([0, 1, 0, 1])),
([RI(2,), RI(2, 5), RI(5, 8, 4)], RI(0, 6)),
([RI(2,), RI(3, 5), RI(5, 8, 4)], I64([0, 1, 3, 4, 5])),
([RI(-2, 2), RI(2, 5), RI(5, 8, 4)], RI(-2, 6)),
([RI(3,), I64([-1, 3, 15])], I64([0, 1, 2, -1, 3, 15])),
([RI(3,), F64([-1, 3.1, 15.])], F64([0, 1, 2, -1, 3.1, 15.])),
([RI(3,), OI(['a', None, 14])], OI([0, 1, 2, 'a', None, 14])),
([RI(3, 1), OI(['a', None, 14])], OI(['a', None, 14]))
])
def appends(self, request):
"""Inputs and expected outputs for RangeIndex.append test"""

return request.param

def test_append(self, appends):
# GH16212
RI = RangeIndex
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you parmaterize this one as well (similar to above)

I64 = Int64Index
F64 = Float64Index
OI = Index
cases = [([RI(1, 12, 5)], RI(1, 12, 5)),
([RI(0, 6, 4)], RI(0, 6, 4)),
([RI(1, 3), RI(3, 7)], RI(1, 7)),
([RI(1, 5, 2), RI(5, 6)], RI(1, 6, 2)),
([RI(1, 3, 2), RI(4, 7, 3)], RI(1, 7, 3)),
([RI(-4, 3, 2), RI(4, 7, 2)], RI(-4, 7, 2)),
([RI(-4, -8), RI(-8, -12)], RI(0, 0)),
([RI(-4, -8), RI(3, -4)], RI(0, 0)),
([RI(-4, -8), RI(3, 5)], RI(3, 5)),
([RI(-4, -2), RI(3, 5)], I64([-4, -3, 3, 4])),
([RI(-2,), RI(3, 5)], RI(3, 5)),
([RI(2,), RI(2)], I64([0, 1, 0, 1])),
([RI(2,), RI(2, 5), RI(5, 8, 4)], RI(0, 6)),
([RI(2,), RI(3, 5), RI(5, 8, 4)], I64([0, 1, 3, 4, 5])),
([RI(-2, 2), RI(2, 5), RI(5, 8, 4)], RI(-2, 6)),
([RI(3,), I64([-1, 3, 15])], I64([0, 1, 2, -1, 3, 15])),
([RI(3,), F64([-1, 3.1, 15.])], F64([0, 1, 2, -1, 3.1, 15.])),
([RI(3,), OI(['a', None, 14])], OI([0, 1, 2, 'a', None, 14])),
([RI(3, 1), OI(['a', None, 14])], OI(['a', None, 14]))
]

for indices, expected in cases:
result = indices[0].append(indices[1:])
tm.assert_index_equal(result, expected, exact=True)

if len(indices) == 2:
# Append single item rather than list
result2 = indices[0].append(indices[1])
tm.assert_index_equal(result2, expected, exact=True)

indices, expected = appends

result = indices[0].append(indices[1:])
tm.assert_index_equal(result, expected, exact=True)

if len(indices) == 2:
# Append single item rather than list
result2 = indices[0].append(indices[1])
tm.assert_index_equal(result2, expected, exact=True)