ENH: Add sort parameter to RangeIndex.union (pandas-dev#24471) (pandas-dev#25788)

reidy-p · jreback · commit af6ccf64dab5 · 2019-03-26T16:05:56.000-04:00
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -31,6 +31,7 @@ Other Enhancements
 - ``Series.str`` has gained :meth:`Series.str.casefold` method to removes all case distinctions present in a string (:issue:`25405`)
 - :meth:`DataFrame.set_index` now works for instances of ``abc.Iterator``, provided their output is of the same length as the calling frame (:issue:`22484`, :issue:`24984`)
 - :meth:`DatetimeIndex.union` now supports the ``sort`` argument. The behaviour of the sort parameter matches that of :meth:`Index.union` (:issue:`24994`)
+- :meth:`RangeIndex.union` now supports the ``sort`` argument. If ``sort=False`` an unsorted ``Int64Index`` is always returned. ``sort=None`` is the default and returns a mononotically increasing ``RangeIndex`` if possible or a sorted ``Int64Index`` if not (:issue:`24471`)
 - :meth:`DataFrame.rename` now supports the ``errors`` argument to raise errors when attempting to rename nonexistent keys (:issue:`13473`)
 - :class:`RangeIndex` has gained :attr:`~RangeIndex.start`, :attr:`~RangeIndex.stop`, and :attr:`~RangeIndex.step` attributes (:issue:`25710`)
 - :class:`datetime.timezone` objects are now supported as arguments to timezone methods and constructors (:issue:`25065`)
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -2320,7 +2320,7 @@ def union(self, other, sort=None):
         else:
             rvals = other._values
 
-        if self.is_monotonic and other.is_monotonic:
+        if sort is None and self.is_monotonic and other.is_monotonic:
             try:
                 result = self._outer_indexer(lvals, rvals)[0]
             except TypeError:
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
@@ -463,23 +463,31 @@ def _extended_gcd(self, a, b):
             old_t, t = t, old_t - quotient * t
         return old_r, old_s, old_t
 
-    def union(self, other):
+    def union(self, other, sort=None):
         """
         Form the union of two Index objects and sorts if possible
 
         Parameters
         ----------
         other : Index or array-like
 
+        sort : False or None, default None
+            Whether to sort resulting index. ``sort=None`` returns a
+            mononotically increasing ``RangeIndex`` if possible or a sorted
+            ``Int64Index`` if not. ``sort=False`` always returns an
+            unsorted ``Int64Index``
+
+            .. versionadded:: 0.25.0
+
         Returns
         -------
         union : Index
         """
         self._assert_can_do_setop(other)
         if len(other) == 0 or self.equals(other) or len(self) == 0:
-            return super(RangeIndex, self).union(other)
+            return super(RangeIndex, self).union(other, sort=sort)
 
-        if isinstance(other, RangeIndex):
+        if isinstance(other, RangeIndex) and sort is None:
             start_s, step_s = self._start, self._step
             end_s = self._start + self._step * (len(self) - 1)
             start_o, step_o = other._start, other._step
@@ -516,7 +524,7 @@ def union(self, other):
                         (end_s - step_o <= end_o)):
                     return RangeIndex(start_r, end_r + step_o, step_o)
 
-        return self._int64index.union(other)
+        return self._int64index.union(other, sort=sort)
 
     @Appender(_index_shared_docs['join'])
     def join(self, other, how='left', level=None, return_indexers=False,
diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py
@@ -86,7 +86,11 @@ def test_union_bug_1730(self, sort):
         rng_b = date_range('1/1/2012', periods=4, freq='4H')
 
         result = rng_a.union(rng_b, sort=sort)
-        exp = DatetimeIndex(sorted(set(list(rng_a)) | set(list(rng_b))))
+        exp = list(rng_a) + list(rng_b[1:])
+        if sort is None:
+            exp = DatetimeIndex(sorted(exp))
+        else:
+            exp = DatetimeIndex(exp)
         tm.assert_index_equal(result, exp)
 
     @pytest.mark.parametrize("sort", [None, False])
@@ -112,7 +116,11 @@ def test_union_bug_4564(self, sort):
         right = left + DateOffset(minutes=15)
 
         result = left.union(right, sort=sort)
-        exp = DatetimeIndex(sorted(set(list(left)) | set(list(right))))
+        exp = list(left) + list(right)
+        if sort is None:
+            exp = DatetimeIndex(sorted(exp))
+        else:
+            exp = DatetimeIndex(exp)
         tm.assert_index_equal(result, exp)
 
     @pytest.mark.parametrize("sort", [None, False])
diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py
@@ -43,7 +43,12 @@ def test_union(self, sort):
         # union
         other1 = pd.period_range('1/1/2000', freq='D', periods=5)
         rng1 = pd.period_range('1/6/2000', freq='D', periods=5)
-        expected1 = pd.period_range('1/1/2000', freq='D', periods=10)
+        expected1 = pd.PeriodIndex(['2000-01-06', '2000-01-07',
+                                    '2000-01-08', '2000-01-09',
+                                    '2000-01-10', '2000-01-01',
+                                    '2000-01-02', '2000-01-03',
+                                    '2000-01-04', '2000-01-05'],
+                                   freq='D')
 
         rng2 = pd.period_range('1/1/2000', freq='D', periods=5)
         other2 = pd.period_range('1/4/2000', freq='D', periods=5)
@@ -77,7 +82,9 @@ def test_union(self, sort):
 
         rng7 = pd.period_range('2003-01-01', freq='A', periods=5)
         other7 = pd.period_range('1998-01-01', freq='A', periods=8)
-        expected7 = pd.period_range('1998-01-01', freq='A', periods=10)
+        expected7 = pd.PeriodIndex(['2003', '2004', '2005', '2006', '2007',
+                                    '1998', '1999', '2000', '2001', '2002'],
+                                   freq='A')
 
         rng8 = pd.PeriodIndex(['1/3/2000', '1/2/2000', '1/1/2000',
                                '1/5/2000', '1/4/2000'], freq='D')
diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py
@@ -11,6 +11,12 @@
 
 from .test_numeric import Numeric
 
+# aliases to make some tests easier to read
+RI = RangeIndex
+I64 = Int64Index
+F64 = Float64Index
+OI = Index
+
 
 class TestRangeIndex(Numeric):
     _holder = RangeIndex
@@ -565,51 +571,73 @@ def test_intersection(self, sort):
         expected = RangeIndex(0, 0, 1)
         tm.assert_index_equal(result, expected)
 
-    def test_union_noncomparable(self):
+    @pytest.mark.parametrize('sort', [False, None])
+    def test_union_noncomparable(self, sort):
         from datetime import datetime, timedelta
         # corner case, non-Int64Index
         now = datetime.now()
         other = Index([now + timedelta(i) for i in range(4)], dtype=object)
-        result = self.index.union(other)
+        result = self.index.union(other, sort=sort)
         expected = Index(np.concatenate((self.index, other)))
         tm.assert_index_equal(result, expected)
 
-        result = other.union(self.index)
+        result = other.union(self.index, sort=sort)
         expected = Index(np.concatenate((other, self.index)))
         tm.assert_index_equal(result, expected)
 
-    def test_union(self):
-        RI = RangeIndex
-        I64 = Int64Index
-        cases = [(RI(0, 10, 1), RI(0, 10, 1), RI(0, 10, 1)),
-                 (RI(0, 10, 1), RI(5, 20, 1), RI(0, 20, 1)),
-                 (RI(0, 10, 1), RI(10, 20, 1), RI(0, 20, 1)),
-                 (RI(0, -10, -1), RI(0, -10, -1), RI(0, -10, -1)),
-                 (RI(0, -10, -1), RI(-10, -20, -1), RI(-19, 1, 1)),
-                 (RI(0, 10, 2), RI(1, 10, 2), RI(0, 10, 1)),
-                 (RI(0, 11, 2), RI(1, 12, 2), RI(0, 12, 1)),
-                 (RI(0, 21, 4), RI(-2, 24, 4), RI(-2, 24, 2)),
-                 (RI(0, -20, -2), RI(-1, -21, -2), RI(-19, 1, 1)),
-                 (RI(0, 100, 5), RI(0, 100, 20), RI(0, 100, 5)),
-                 (RI(0, -100, -5), RI(5, -100, -20), RI(-95, 10, 5)),
-                 (RI(0, -11, -1), RI(1, -12, -4), RI(-11, 2, 1)),
-                 (RI(0), RI(0), RI(0)),
-                 (RI(0, -10, -2), RI(0), RI(0, -10, -2)),
-                 (RI(0, 100, 2), RI(100, 150, 200), RI(0, 102, 2)),
-                 (RI(0, -100, -2), RI(-100, 50, 102), RI(-100, 4, 2)),
-                 (RI(0, -100, -1), RI(0, -50, -3), RI(-99, 1, 1)),
-                 (RI(0, 1, 1), RI(5, 6, 10), RI(0, 6, 5)),
-                 (RI(0, 10, 5), RI(-5, -6, -20), RI(-5, 10, 5)),
-                 (RI(0, 3, 1), RI(4, 5, 1), I64([0, 1, 2, 4])),
-                 (RI(0, 10, 1), I64([]), RI(0, 10, 1)),
-                 (RI(0), I64([1, 5, 6]), I64([1, 5, 6]))]
-        for idx1, idx2, expected in cases:
-            res1 = idx1.union(idx2)
-            res2 = idx2.union(idx1)
-            res3 = idx1._int64index.union(idx2)
-            tm.assert_index_equal(res1, expected, exact=True)
-            tm.assert_index_equal(res2, expected, exact=True)
-            tm.assert_index_equal(res3, expected)
+    @pytest.fixture(params=[
+        (RI(0, 10, 1), RI(0, 10, 1), RI(0, 10, 1), RI(0, 10, 1)),
+        (RI(0, 10, 1), RI(5, 20, 1), RI(0, 20, 1), I64(range(20))),
+        (RI(0, 10, 1), RI(10, 20, 1), RI(0, 20, 1), I64(range(20))),
+        (RI(0, -10, -1), RI(0, -10, -1), RI(0, -10, -1), RI(0, -10, -1)),
+        (RI(0, -10, -1), RI(-10, -20, -1), RI(-19, 1, 1),
+         I64(range(0, -20, -1))),
+        (RI(0, 10, 2), RI(1, 10, 2), RI(0, 10, 1),
+         I64(list(range(0, 10, 2)) + list(range(1, 10, 2)))),
+        (RI(0, 11, 2), RI(1, 12, 2), RI(0, 12, 1),
+         I64(list(range(0, 11, 2)) + list(range(1, 12, 2)))),
+        (RI(0, 21, 4), RI(-2, 24, 4), RI(-2, 24, 2),
+         I64(list(range(0, 21, 4)) + list(range(-2, 24, 4)))),
+        (RI(0, -20, -2), RI(-1, -21, -2), RI(-19, 1, 1),
+         I64(list(range(0, -20, -2)) + list(range(-1, -21, -2)))),
+        (RI(0, 100, 5), RI(0, 100, 20), RI(0, 100, 5), I64(range(0, 100, 5))),
+        (RI(0, -100, -5), RI(5, -100, -20), RI(-95, 10, 5),
+         I64(list(range(0, -100, -5)) + [5])),
+        (RI(0, -11, -1), RI(1, -12, -4), RI(-11, 2, 1),
+         I64(list(range(0, -11, -1)) + [1, -11])),
+        (RI(0), RI(0), RI(0), RI(0)),
+        (RI(0, -10, -2), RI(0), RI(0, -10, -2), RI(0, -10, -2)),
+        (RI(0, 100, 2), RI(100, 150, 200), RI(0, 102, 2),
+         I64(range(0, 102, 2))),
+        (RI(0, -100, -2), RI(-100, 50, 102), RI(-100, 4, 2),
+         I64(list(range(0, -100, -2)) + [-100, 2])),
+        (RI(0, -100, -1), RI(0, -50, -3), RI(-99, 1, 1),
+         I64(list(range(0, -100, -1)))),
+        (RI(0, 1, 1), RI(5, 6, 10), RI(0, 6, 5), I64([0, 5])),
+        (RI(0, 10, 5), RI(-5, -6, -20), RI(-5, 10, 5), I64([0, 5, -5])),
+        (RI(0, 3, 1), RI(4, 5, 1), I64([0, 1, 2, 4]), I64([0, 1, 2, 4])),
+        (RI(0, 10, 1), I64([]), RI(0, 10, 1), RI(0, 10, 1)),
+        (RI(0), I64([1, 5, 6]), I64([1, 5, 6]), I64([1, 5, 6]))
+    ])
+    def unions(self, request):
+        """Inputs and expected outputs for RangeIndex.union tests"""
+
+        return request.param
+
+    def test_union_sorted(self, unions):
+
+        idx1, idx2, expected_sorted, expected_notsorted = unions
+
+        res1 = idx1.union(idx2, sort=None)
+        tm.assert_index_equal(res1, expected_sorted, exact=True)
+
+        res1 = idx1.union(idx2, sort=False)
+        tm.assert_index_equal(res1, expected_notsorted, exact=True)
+
+        res2 = idx2.union(idx1, sort=None)
+        res3 = idx1._int64index.union(idx2, sort=None)
+        tm.assert_index_equal(res2, expected_sorted, exact=True)
+        tm.assert_index_equal(res3, expected_sorted)
 
     def test_nbytes(self):
 
@@ -840,38 +868,41 @@ def test_len_specialised(self):
             i = RangeIndex(0, 5, step)
             assert len(i) == 0
 
-    def test_append(self):
+    @pytest.fixture(params=[
+        ([RI(1, 12, 5)], RI(1, 12, 5)),
+        ([RI(0, 6, 4)], RI(0, 6, 4)),
+        ([RI(1, 3), RI(3, 7)], RI(1, 7)),
+        ([RI(1, 5, 2), RI(5, 6)], RI(1, 6, 2)),
+        ([RI(1, 3, 2), RI(4, 7, 3)], RI(1, 7, 3)),
+        ([RI(-4, 3, 2), RI(4, 7, 2)], RI(-4, 7, 2)),
+        ([RI(-4, -8), RI(-8, -12)], RI(0, 0)),
+        ([RI(-4, -8), RI(3, -4)], RI(0, 0)),
+        ([RI(-4, -8), RI(3, 5)], RI(3, 5)),
+        ([RI(-4, -2), RI(3, 5)], I64([-4, -3, 3, 4])),
+        ([RI(-2,), RI(3, 5)], RI(3, 5)),
+        ([RI(2,), RI(2)], I64([0, 1, 0, 1])),
+        ([RI(2,), RI(2, 5), RI(5, 8, 4)], RI(0, 6)),
+        ([RI(2,), RI(3, 5), RI(5, 8, 4)], I64([0, 1, 3, 4, 5])),
+        ([RI(-2, 2), RI(2, 5), RI(5, 8, 4)], RI(-2, 6)),
+        ([RI(3,), I64([-1, 3, 15])], I64([0, 1, 2, -1, 3, 15])),
+        ([RI(3,), F64([-1, 3.1, 15.])], F64([0, 1, 2, -1, 3.1, 15.])),
+        ([RI(3,), OI(['a', None, 14])], OI([0, 1, 2, 'a', None, 14])),
+        ([RI(3, 1), OI(['a', None, 14])], OI(['a', None, 14]))
+    ])
+    def appends(self, request):
+        """Inputs and expected outputs for RangeIndex.append test"""
+
+        return request.param
+
+    def test_append(self, appends):
         # GH16212
-        RI = RangeIndex
-        I64 = Int64Index
-        F64 = Float64Index
-        OI = Index
-        cases = [([RI(1, 12, 5)], RI(1, 12, 5)),
-                 ([RI(0, 6, 4)], RI(0, 6, 4)),
-                 ([RI(1, 3), RI(3, 7)], RI(1, 7)),
-                 ([RI(1, 5, 2), RI(5, 6)], RI(1, 6, 2)),
-                 ([RI(1, 3, 2), RI(4, 7, 3)], RI(1, 7, 3)),
-                 ([RI(-4, 3, 2), RI(4, 7, 2)], RI(-4, 7, 2)),
-                 ([RI(-4, -8), RI(-8, -12)], RI(0, 0)),
-                 ([RI(-4, -8), RI(3, -4)], RI(0, 0)),
-                 ([RI(-4, -8), RI(3, 5)], RI(3, 5)),
-                 ([RI(-4, -2), RI(3, 5)], I64([-4, -3, 3, 4])),
-                 ([RI(-2,), RI(3, 5)], RI(3, 5)),
-                 ([RI(2,), RI(2)], I64([0, 1, 0, 1])),
-                 ([RI(2,), RI(2, 5), RI(5, 8, 4)], RI(0, 6)),
-                 ([RI(2,), RI(3, 5), RI(5, 8, 4)], I64([0, 1, 3, 4, 5])),
-                 ([RI(-2, 2), RI(2, 5), RI(5, 8, 4)], RI(-2, 6)),
-                 ([RI(3,), I64([-1, 3, 15])], I64([0, 1, 2, -1, 3, 15])),
-                 ([RI(3,), F64([-1, 3.1, 15.])], F64([0, 1, 2, -1, 3.1, 15.])),
-                 ([RI(3,), OI(['a', None, 14])], OI([0, 1, 2, 'a', None, 14])),
-                 ([RI(3, 1), OI(['a', None, 14])], OI(['a', None, 14]))
-                 ]
-
-        for indices, expected in cases:
-            result = indices[0].append(indices[1:])
-            tm.assert_index_equal(result, expected, exact=True)
-
-            if len(indices) == 2:
-                # Append single item rather than list
-                result2 = indices[0].append(indices[1])
-                tm.assert_index_equal(result2, expected, exact=True)
+
+        indices, expected = appends
+
+        result = indices[0].append(indices[1:])
+        tm.assert_index_equal(result, expected, exact=True)
+
+        if len(indices) == 2:
+            # Append single item rather than list
+            result2 = indices[0].append(indices[1])
+            tm.assert_index_equal(result2, expected, exact=True)