Skip to content

Commit 17d2ec8

Browse files
committed
ENH: Add sort parameter to RangeIndex.union (pandas-dev#24471)
1 parent 0f5a7e3 commit 17d2ec8

File tree

6 files changed

+112
-32
lines changed

6 files changed

+112
-32
lines changed

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ Other Enhancements
3131
- ``Series.str`` has gained :meth:`Series.str.casefold` method to removes all case distinctions present in a string (:issue:`25405`)
3232
- :meth:`DataFrame.set_index` now works for instances of ``abc.Iterator``, provided their output is of the same length as the calling frame (:issue:`22484`, :issue:`24984`)
3333
- :meth:`DatetimeIndex.union` now supports the ``sort`` argument. The behaviour of the sort parameter matches that of :meth:`Index.union` (:issue:`24994`)
34+
- :meth:`RangeIndex.union` now supports the ``sort`` argument. If ``sort=False`` an unsorted ``Int64Index`` is always returned. ``sort=None`` is the default and returns a mononotically increasing ``RangeIndex`` if possible or a sorted ``Int64Index`` if not (:issue:`24471`)
3435
- :meth:`DataFrame.rename` now supports the ``errors`` argument to raise errors when attempting to rename nonexistent keys (:issue:`13473`)
3536
- :class:`RangeIndex` has gained :attr:`~RangeIndex.start`, :attr:`~RangeIndex.stop`, and :attr:`~RangeIndex.step` attributes (:issue:`25710`)
3637
- :class:`datetime.timezone` objects are now supported as arguments to timezone methods and constructors (:issue:`25065`)

pandas/core/indexes/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2320,7 +2320,7 @@ def union(self, other, sort=None):
23202320
else:
23212321
rvals = other._values
23222322

2323-
if self.is_monotonic and other.is_monotonic:
2323+
if self.is_monotonic and other.is_monotonic and sort is None:
23242324
try:
23252325
result = self._outer_indexer(lvals, rvals)[0]
23262326
except TypeError:

pandas/core/indexes/range.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -463,23 +463,31 @@ def _extended_gcd(self, a, b):
463463
old_t, t = t, old_t - quotient * t
464464
return old_r, old_s, old_t
465465

466-
def union(self, other):
466+
def union(self, other, sort=None):
467467
"""
468468
Form the union of two Index objects and sorts if possible
469469
470470
Parameters
471471
----------
472472
other : Index or array-like
473473
474+
sort : False or None, default None
475+
Whether to sort resulting index. ``sort=None`` returns a
476+
mononotically increasing ``RangeIndex`` if possible or a sorted
477+
``Int64Index`` if not. ``sort=False`` always returns an
478+
unsorted ``Int64Index``
479+
480+
.. versionadded:: 0.25.0
481+
474482
Returns
475483
-------
476484
union : Index
477485
"""
478486
self._assert_can_do_setop(other)
479487
if len(other) == 0 or self.equals(other) or len(self) == 0:
480-
return super(RangeIndex, self).union(other)
488+
return super(RangeIndex, self).union(other, sort=sort)
481489

482-
if isinstance(other, RangeIndex):
490+
if isinstance(other, RangeIndex) and sort is None:
483491
start_s, step_s = self._start, self._step
484492
end_s = self._start + self._step * (len(self) - 1)
485493
start_o, step_o = other._start, other._step
@@ -516,7 +524,7 @@ def union(self, other):
516524
(end_s - step_o <= end_o)):
517525
return RangeIndex(start_r, end_r + step_o, step_o)
518526

519-
return self._int64index.union(other)
527+
return self._int64index.union(other, sort=sort)
520528

521529
@Appender(_index_shared_docs['join'])
522530
def join(self, other, how='left', level=None, return_indexers=False,

pandas/tests/indexes/datetimes/test_setops.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,11 @@ def test_union_bug_1730(self, sort):
8686
rng_b = date_range('1/1/2012', periods=4, freq='4H')
8787

8888
result = rng_a.union(rng_b, sort=sort)
89-
exp = DatetimeIndex(sorted(set(list(rng_a)) | set(list(rng_b))))
89+
exp = list(rng_a) + list(rng_b[1:])
90+
if sort is None:
91+
exp = DatetimeIndex(sorted(exp))
92+
else:
93+
exp = DatetimeIndex(exp)
9094
tm.assert_index_equal(result, exp)
9195

9296
@pytest.mark.parametrize("sort", [None, False])
@@ -112,7 +116,11 @@ def test_union_bug_4564(self, sort):
112116
right = left + DateOffset(minutes=15)
113117

114118
result = left.union(right, sort=sort)
115-
exp = DatetimeIndex(sorted(set(list(left)) | set(list(right))))
119+
exp = list(left) + list(right)
120+
if sort is None:
121+
exp = DatetimeIndex(sorted(exp))
122+
else:
123+
exp = DatetimeIndex(exp)
116124
tm.assert_index_equal(result, exp)
117125

118126
@pytest.mark.parametrize("sort", [None, False])

pandas/tests/indexes/period/test_setops.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,12 @@ def test_union(self, sort):
4343
# union
4444
other1 = pd.period_range('1/1/2000', freq='D', periods=5)
4545
rng1 = pd.period_range('1/6/2000', freq='D', periods=5)
46-
expected1 = pd.period_range('1/1/2000', freq='D', periods=10)
46+
expected1 = pd.PeriodIndex(['2000-01-06', '2000-01-07',
47+
'2000-01-08', '2000-01-09',
48+
'2000-01-10', '2000-01-01',
49+
'2000-01-02', '2000-01-03',
50+
'2000-01-04', '2000-01-05'],
51+
freq='D')
4752

4853
rng2 = pd.period_range('1/1/2000', freq='D', periods=5)
4954
other2 = pd.period_range('1/4/2000', freq='D', periods=5)
@@ -77,7 +82,9 @@ def test_union(self, sort):
7782

7883
rng7 = pd.period_range('2003-01-01', freq='A', periods=5)
7984
other7 = pd.period_range('1998-01-01', freq='A', periods=8)
80-
expected7 = pd.period_range('1998-01-01', freq='A', periods=10)
85+
expected7 = pd.PeriodIndex(['2003', '2004', '2005', '2006', '2007',
86+
'1998', '1999', '2000', '2001', '2002'],
87+
freq='A')
8188

8289
rng8 = pd.PeriodIndex(['1/3/2000', '1/2/2000', '1/1/2000',
8390
'1/5/2000', '1/4/2000'], freq='D')

pandas/tests/indexes/test_range.py

+79-23
Original file line numberDiff line numberDiff line change
@@ -581,36 +581,92 @@ def test_union_noncomparable(self):
581581
def test_union(self):
582582
RI = RangeIndex
583583
I64 = Int64Index
584-
cases = [(RI(0, 10, 1), RI(0, 10, 1), RI(0, 10, 1)),
585-
(RI(0, 10, 1), RI(5, 20, 1), RI(0, 20, 1)),
586-
(RI(0, 10, 1), RI(10, 20, 1), RI(0, 20, 1)),
587-
(RI(0, -10, -1), RI(0, -10, -1), RI(0, -10, -1)),
588-
(RI(0, -10, -1), RI(-10, -20, -1), RI(-19, 1, 1)),
589-
(RI(0, 10, 2), RI(1, 10, 2), RI(0, 10, 1)),
590-
(RI(0, 11, 2), RI(1, 12, 2), RI(0, 12, 1)),
591-
(RI(0, 21, 4), RI(-2, 24, 4), RI(-2, 24, 2)),
592-
(RI(0, -20, -2), RI(-1, -21, -2), RI(-19, 1, 1)),
593-
(RI(0, 100, 5), RI(0, 100, 20), RI(0, 100, 5)),
594-
(RI(0, -100, -5), RI(5, -100, -20), RI(-95, 10, 5)),
595-
(RI(0, -11, -1), RI(1, -12, -4), RI(-11, 2, 1)),
596-
(RI(0), RI(0), RI(0)),
597-
(RI(0, -10, -2), RI(0), RI(0, -10, -2)),
598-
(RI(0, 100, 2), RI(100, 150, 200), RI(0, 102, 2)),
599-
(RI(0, -100, -2), RI(-100, 50, 102), RI(-100, 4, 2)),
600-
(RI(0, -100, -1), RI(0, -50, -3), RI(-99, 1, 1)),
601-
(RI(0, 1, 1), RI(5, 6, 10), RI(0, 6, 5)),
602-
(RI(0, 10, 5), RI(-5, -6, -20), RI(-5, 10, 5)),
603-
(RI(0, 3, 1), RI(4, 5, 1), I64([0, 1, 2, 4])),
604-
(RI(0, 10, 1), I64([]), RI(0, 10, 1)),
605-
(RI(0), I64([1, 5, 6]), I64([1, 5, 6]))]
606-
for idx1, idx2, expected in cases:
584+
585+
inputs = [(RI(0, 10, 1), RI(0, 10, 1)),
586+
(RI(0, 10, 1), RI(5, 20, 1)),
587+
(RI(0, 10, 1), RI(10, 20, 1)),
588+
(RI(0, -10, -1), RI(0, -10, -1)),
589+
(RI(0, -10, -1), RI(-10, -20, -1)),
590+
(RI(0, 10, 2), RI(1, 10, 2)),
591+
(RI(0, 11, 2), RI(1, 12, 2)),
592+
(RI(0, 21, 4), RI(-2, 24, 4)),
593+
(RI(0, -20, -2), RI(-1, -21, -2)),
594+
(RI(0, 100, 5), RI(0, 100, 20)),
595+
(RI(0, -100, -5), RI(5, -100, -20)),
596+
(RI(0, -11, -1), RI(1, -12, -4)),
597+
(RI(0), RI(0)),
598+
(RI(0, -10, -2), RI(0)),
599+
(RI(0, 100, 2), RI(100, 150, 200)),
600+
(RI(0, -100, -2), RI(-100, 50, 102)),
601+
(RI(0, -100, -1), RI(0, -50, -3)),
602+
(RI(0, 1, 1), RI(5, 6, 10)),
603+
(RI(0, 10, 5), RI(-5, -6, -20)),
604+
(RI(0, 3, 1), RI(4, 5, 1)),
605+
(RI(0, 10, 1), I64([])),
606+
(RI(0), I64([1, 5, 6]))]
607+
608+
expected_sorted = [RI(0, 10, 1),
609+
RI(0, 20, 1),
610+
RI(0, 20, 1),
611+
RI(0, -10, -1),
612+
RI(-19, 1, 1),
613+
RI(0, 10, 1),
614+
RI(0, 12, 1),
615+
RI(-2, 24, 2),
616+
RI(-19, 1, 1),
617+
RI(0, 100, 5),
618+
RI(-95, 10, 5),
619+
RI(-11, 2, 1),
620+
RI(0),
621+
RI(0, -10, -2),
622+
RI(0, 102, 2),
623+
RI(-100, 4, 2),
624+
RI(-99, 1, 1),
625+
RI(0, 6, 5),
626+
RI(-5, 10, 5),
627+
I64([0, 1, 2, 4]),
628+
RI(0, 10, 1),
629+
I64([1, 5, 6])]
630+
631+
for ((idx1, idx2), expected) in zip(inputs, expected_sorted):
607632
res1 = idx1.union(idx2)
608633
res2 = idx2.union(idx1)
609634
res3 = idx1._int64index.union(idx2)
610635
tm.assert_index_equal(res1, expected, exact=True)
611636
tm.assert_index_equal(res2, expected, exact=True)
612637
tm.assert_index_equal(res3, expected)
613638

639+
expected_notsorted = [RI(0, 10, 1),
640+
I64(range(20)),
641+
I64(range(20)),
642+
RI(0, -10, -1),
643+
I64(range(0, -20, -1)),
644+
I64(list(range(0, 10, 2)) +
645+
list(range(1, 10, 2))),
646+
I64(list(range(0, 11, 2)) +
647+
list(range(1, 12, 2))),
648+
I64(list(range(0, 21, 4)) +
649+
list(range(-2, 24, 4))),
650+
I64(list(range(0, -20, -2)) +
651+
list(range(-1, -21, -2))),
652+
I64(range(0, 100, 5)),
653+
I64(list(range(0, -100, -5)) + [5]),
654+
I64(list(range(0, -11, -1)) + [1, -11]),
655+
RI(0),
656+
RI(0, -10, -2),
657+
I64(range(0, 102, 2)),
658+
I64(list(range(0, -100, -2)) + [-100, 2]),
659+
I64(list(range(0, -100, -1))),
660+
I64([0, 5]),
661+
I64([0, 5, -5]),
662+
I64([0, 1, 2, 4]),
663+
RI(0, 10, 1),
664+
I64([1, 5, 6])]
665+
666+
for ((idx1, idx2), expected) in zip(inputs, expected_notsorted):
667+
res1 = idx1.union(idx2, sort=False)
668+
tm.assert_index_equal(res1, expected, exact=True)
669+
614670
def test_nbytes(self):
615671

616672
# memory savings vs int index

0 commit comments

Comments
 (0)