Skip to content

Commit 9d9a595

Browse files
committed
ENH: Add in sort keyword to DatetimeIndex.union
1 parent 9de4cc1 commit 9d9a595

File tree

4 files changed

+109
-51
lines changed

4 files changed

+109
-51
lines changed

doc/source/styled.xlsx

5.55 KB
Binary file not shown.

doc/source/whatsnew/v0.25.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ Other Enhancements
2020
^^^^^^^^^^^^^^^^^^
2121

2222
- :meth:`Timestamp.replace` now supports the ``fold`` argument to disambiguate DST transition times (:issue:`25017`)
23-
-
23+
- :meth:`DatetimeIndex.union` now supports the ``sort`` argument. The behaviour of the sort parameter matches that of ``Index.union`` (:issue:`24994`)
2424
-
2525

2626
.. _whatsnew_0250.api_breaking:

pandas/core/indexes/datetimes.py

+30-6
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,7 @@ def _formatter_func(self):
460460
# --------------------------------------------------------------------
461461
# Set Operation Methods
462462

463-
def union(self, other):
463+
def union(self, other, sort=None):
464464
"""
465465
Specialized union for DatetimeIndex objects. If combine
466466
overlapping ranges with the same DateOffset, will be much
@@ -469,15 +469,27 @@ def union(self, other):
469469
Parameters
470470
----------
471471
other : DatetimeIndex or array-like
472+
sort : bool or None, default None
473+
Whether to sort the resulting Index.
474+
475+
* None : Sort the result, except when
476+
477+
1. `self` and `other` are equal.
478+
2. `self` or `other` has length 0.
479+
3. Some values in `self` or `other` cannot be compared.
480+
A RuntimeWarning is issued in this case.
481+
482+
* False : do not sort the result
472483
473484
Returns
474485
-------
475486
y : Index or DatetimeIndex
476487
"""
488+
self._validate_sort_keyword(sort)
477489
self._assert_can_do_setop(other)
478490

479491
if len(other) == 0 or self.equals(other) or len(self) == 0:
480-
return super(DatetimeIndex, self).union(other)
492+
return super(DatetimeIndex, self).union(other, sort=sort)
481493

482494
if not isinstance(other, DatetimeIndex):
483495
try:
@@ -488,9 +500,9 @@ def union(self, other):
488500
this, other = self._maybe_utc_convert(other)
489501

490502
if this._can_fast_union(other):
491-
return this._fast_union(other)
503+
return this._fast_union(other, sort=sort)
492504
else:
493-
result = Index.union(this, other)
505+
result = Index.union(this, other, sort=sort)
494506
if isinstance(result, DatetimeIndex):
495507
# TODO: we shouldn't be setting attributes like this;
496508
# in all the tests this equality already holds
@@ -563,16 +575,28 @@ def _can_fast_union(self, other):
563575
# this will raise
564576
return False
565577

566-
def _fast_union(self, other):
578+
def _fast_union(self, other, sort=None):
567579
if len(other) == 0:
568580
return self.view(type(self))
569581

570582
if len(self) == 0:
571583
return other.view(type(self))
572584

573-
# to make our life easier, "sort" the two ranges
585+
# Both DTIs are monotonic. Check if they are already
586+
# in the "correct" order
574587
if self[0] <= other[0]:
575588
left, right = self, other
589+
# DTIs are not in the "correct" order and we don't want
590+
# to sort but want to remove overlaps
591+
elif sort is not None:
592+
left, right = self, other
593+
left_start = left[0]
594+
loc = right.searchsorted(left_start, side='left')
595+
right_chunk = right.values[:loc]
596+
dates = _concat._concat_compat((left.values, right_chunk))
597+
return self._shallow_copy(dates)
598+
# DTIs are not in the "correct" order and we want
599+
# to sort
576600
else:
577601
left, right = other, self
578602

pandas/tests/indexes/datetimes/test_setops.py

+78-44
Original file line numberDiff line numberDiff line change
@@ -21,21 +21,23 @@ class TestDatetimeIndexSetOps(object):
2121
'dateutil/US/Pacific']
2222

2323
# TODO: moved from test_datetimelike; dedup with version below
24-
def test_union2(self):
24+
@pytest.mark.parametrize("sort", [None, False])
25+
def test_union2(self, sort):
2526
everything = tm.makeDateIndex(10)
2627
first = everything[:5]
2728
second = everything[5:]
28-
union = first.union(second)
29-
assert tm.equalContents(union, everything)
29+
union = first.union(second, sort=sort)
30+
tm.assert_index_equal(union, everything)
3031

3132
# GH 10149
3233
cases = [klass(second.values) for klass in [np.array, Series, list]]
3334
for case in cases:
34-
result = first.union(case)
35-
assert tm.equalContents(result, everything)
35+
result = first.union(case, sort=sort)
36+
tm.assert_index_equal(result, everything)
3637

3738
@pytest.mark.parametrize("tz", tz)
38-
def test_union(self, tz):
39+
@pytest.mark.parametrize("sort", [None, False])
40+
def test_union(self, tz, sort):
3941
rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz)
4042
other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz)
4143
expected1 = pd.date_range('1/1/2000', freq='D', periods=10, tz=tz)
@@ -52,52 +54,68 @@ def test_union(self, tz):
5254
(rng2, other2, expected2),
5355
(rng3, other3, expected3)]:
5456

55-
result_union = rng.union(other)
57+
result_union = rng.union(other, sort=sort)
5658
tm.assert_index_equal(result_union, expected)
5759

58-
def test_union_coverage(self):
60+
result_union = other.union(rng, sort=sort)
61+
if sort is None:
62+
tm.assert_index_equal(result_union, expected)
63+
else:
64+
assert tm.equalContents(result_union, expected)
65+
66+
@pytest.mark.parametrize("sort", [None, False])
67+
def test_union_coverage(self, sort):
5968
idx = DatetimeIndex(['2000-01-03', '2000-01-01', '2000-01-02'])
6069
ordered = DatetimeIndex(idx.sort_values(), freq='infer')
61-
result = ordered.union(idx)
70+
result = ordered.union(idx, sort=sort)
6271
tm.assert_index_equal(result, ordered)
6372

64-
result = ordered[:0].union(ordered)
73+
result = ordered[:0].union(ordered, sort=sort)
6574
tm.assert_index_equal(result, ordered)
6675
assert result.freq == ordered.freq
6776

68-
def test_union_bug_1730(self):
77+
@pytest.mark.parametrize("sort", [None, False])
78+
def test_union_bug_1730(self, sort):
6979
rng_a = date_range('1/1/2012', periods=4, freq='3H')
7080
rng_b = date_range('1/1/2012', periods=4, freq='4H')
7181

72-
result = rng_a.union(rng_b)
82+
result = rng_a.union(rng_b, sort=sort)
7383
exp = DatetimeIndex(sorted(set(list(rng_a)) | set(list(rng_b))))
7484
tm.assert_index_equal(result, exp)
7585

76-
def test_union_bug_1745(self):
86+
@pytest.mark.parametrize("sort", [None, False])
87+
def test_union_bug_1745(self, sort):
7788
left = DatetimeIndex(['2012-05-11 15:19:49.695000'])
7889
right = DatetimeIndex(['2012-05-29 13:04:21.322000',
7990
'2012-05-11 15:27:24.873000',
8091
'2012-05-11 15:31:05.350000'])
8192

82-
result = left.union(right)
83-
exp = DatetimeIndex(sorted(set(list(left)) | set(list(right))))
93+
result = left.union(right, sort=sort)
94+
exp = DatetimeIndex(['2012-05-11 15:19:49.695000',
95+
'2012-05-29 13:04:21.322000',
96+
'2012-05-11 15:27:24.873000',
97+
'2012-05-11 15:31:05.350000'])
98+
if sort is None:
99+
exp = DatetimeIndex(sorted(set(list(left)) | set(list(right))))
84100
tm.assert_index_equal(result, exp)
85101

86-
def test_union_bug_4564(self):
102+
@pytest.mark.parametrize("sort", [None, False])
103+
def test_union_bug_4564(self, sort):
87104
from pandas import DateOffset
88105
left = date_range("2013-01-01", "2013-02-01")
89106
right = left + DateOffset(minutes=15)
90107

91-
result = left.union(right)
108+
result = left.union(right, sort=sort)
92109
exp = DatetimeIndex(sorted(set(list(left)) | set(list(right))))
93110
tm.assert_index_equal(result, exp)
94111

95-
def test_union_freq_both_none(self):
112+
@pytest.mark.parametrize("sort", [None, False])
113+
def test_union_freq_both_none(self, sort):
96114
# GH11086
97115
expected = bdate_range('20150101', periods=10)
98116
expected.freq = None
99117

100-
result = expected.union(expected)
118+
result = expected.union(expected, sort=sort)
101119
tm.assert_index_equal(result, expected)
102120
assert result.freq is None
103121

@@ -112,11 +130,14 @@ def test_union_dataframe_index(self):
112130
exp = pd.date_range('1/1/1980', '1/1/2012', freq='MS')
113131
tm.assert_index_equal(df.index, exp)
114132

115-
def test_union_with_DatetimeIndex(self):
133+
@pytest.mark.parametrize("sort", [None, False])
134+
def test_union_with_DatetimeIndex(self, sort):
116135
i1 = Int64Index(np.arange(0, 20, 2))
117136
i2 = date_range(start='2012-01-03 00:00:00', periods=10, freq='D')
118-
i1.union(i2) # Works
119-
i2.union(i1) # Fails with "AttributeError: can't set attribute"
137+
# Works
138+
i1.union(i2, sort=sort)
139+
# Fails with "AttributeError: can't set attribute"
140+
i2.union(i1, sort=sort)
120141

121142
# TODO: moved from test_datetimelike; de-duplicate with version below
122143
def test_intersection2(self):
@@ -262,11 +283,12 @@ def test_datetimeindex_diff(self, sort):
262283
periods=98)
263284
assert len(dti1.difference(dti2, sort)) == 2
264285

265-
def test_datetimeindex_union_join_empty(self):
286+
@pytest.mark.parametrize("sort", [None, False])
287+
def test_datetimeindex_union_join_empty(self, sort):
266288
dti = date_range(start='1/1/2001', end='2/1/2001', freq='D')
267289
empty = Index([])
268290

269-
result = dti.union(empty)
291+
result = dti.union(empty, sort=sort)
270292
assert isinstance(result, DatetimeIndex)
271293
assert result is result
272294

@@ -287,35 +309,39 @@ class TestBusinessDatetimeIndex(object):
287309
def setup_method(self, method):
288310
self.rng = bdate_range(START, END)
289311

290-
def test_union(self):
312+
@pytest.mark.parametrize("sort", [None, False])
313+
def test_union(self, sort):
291314
# overlapping
292315
left = self.rng[:10]
293316
right = self.rng[5:10]
294317

295-
the_union = left.union(right)
318+
the_union = left.union(right, sort=sort)
296319
assert isinstance(the_union, DatetimeIndex)
297320

298321
# non-overlapping, gap in middle
299322
left = self.rng[:5]
300323
right = self.rng[10:]
301324

302-
the_union = left.union(right)
325+
the_union = left.union(right, sort=sort)
303326
assert isinstance(the_union, Index)
304327

305328
# non-overlapping, no gap
306329
left = self.rng[:5]
307330
right = self.rng[5:10]
308331

309-
the_union = left.union(right)
332+
the_union = left.union(right, sort=sort)
310333
assert isinstance(the_union, DatetimeIndex)
311334

312335
# order does not matter
313-
tm.assert_index_equal(right.union(left), the_union)
336+
if sort is None:
337+
tm.assert_index_equal(right.union(left, sort=sort), the_union)
338+
else:
339+
assert tm.equalContents(right.union(left, sort=sort), the_union)
314340

315341
# overlapping, but different offset
316342
rng = date_range(START, END, freq=BMonthEnd())
317343

318-
the_union = self.rng.union(rng)
344+
the_union = self.rng.union(rng, sort=sort)
319345
assert isinstance(the_union, DatetimeIndex)
320346

321347
def test_outer_join(self):
@@ -350,16 +376,20 @@ def test_outer_join(self):
350376
assert isinstance(the_join, DatetimeIndex)
351377
assert the_join.freq is None
352378

353-
def test_union_not_cacheable(self):
379+
@pytest.mark.parametrize("sort", [None, False])
380+
def test_union_not_cacheable(self, sort):
354381
rng = date_range('1/1/2000', periods=50, freq=Minute())
355382
rng1 = rng[10:]
356383
rng2 = rng[:25]
357-
the_union = rng1.union(rng2)
358-
tm.assert_index_equal(the_union, rng)
384+
the_union = rng1.union(rng2, sort=sort)
385+
if sort is None:
386+
tm.assert_index_equal(the_union, rng)
387+
else:
388+
assert tm.equalContents(the_union, rng)
359389

360390
rng1 = rng[10:]
361391
rng2 = rng[15:35]
362-
the_union = rng1.union(rng2)
392+
the_union = rng1.union(rng2, sort=sort)
363393
expected = rng[10:]
364394
tm.assert_index_equal(the_union, expected)
365395

@@ -388,7 +418,8 @@ def test_intersection_bug(self):
388418
result = a.intersection(b)
389419
tm.assert_index_equal(result, b)
390420

391-
def test_month_range_union_tz_pytz(self):
421+
@pytest.mark.parametrize("sort", [None, False])
422+
def test_month_range_union_tz_pytz(self, sort):
392423
from pytz import timezone
393424
tz = timezone('US/Eastern')
394425

@@ -403,10 +434,11 @@ def test_month_range_union_tz_pytz(self):
403434
late_dr = date_range(start=late_start, end=late_end, tz=tz,
404435
freq=MonthEnd())
405436

406-
early_dr.union(late_dr)
437+
early_dr.union(late_dr, sort=sort)
407438

408439
@td.skip_if_windows_python_3
409-
def test_month_range_union_tz_dateutil(self):
440+
@pytest.mark.parametrize("sort", [None, False])
441+
def test_month_range_union_tz_dateutil(self, sort):
410442
from pandas._libs.tslibs.timezones import dateutil_gettz
411443
tz = dateutil_gettz('US/Eastern')
412444

@@ -421,43 +453,45 @@ def test_month_range_union_tz_dateutil(self):
421453
late_dr = date_range(start=late_start, end=late_end, tz=tz,
422454
freq=MonthEnd())
423455

424-
early_dr.union(late_dr)
456+
early_dr.union(late_dr, sort=sort)
425457

426458

427459
class TestCustomDatetimeIndex(object):
428460

429461
def setup_method(self, method):
430462
self.rng = bdate_range(START, END, freq='C')
431463

432-
def test_union(self):
464+
@pytest.mark.parametrize("sort", [None, False])
465+
def test_union(self, sort):
433466
# overlapping
434467
left = self.rng[:10]
435468
right = self.rng[5:10]
436469

437-
the_union = left.union(right)
470+
the_union = left.union(right, sort=sort)
438471
assert isinstance(the_union, DatetimeIndex)
439472

440473
# non-overlapping, gap in middle
441474
left = self.rng[:5]
442475
right = self.rng[10:]
443476

444-
the_union = left.union(right)
477+
the_union = left.union(right, sort)
445478
assert isinstance(the_union, Index)
446479

447480
# non-overlapping, no gap
448481
left = self.rng[:5]
449482
right = self.rng[5:10]
450483

451-
the_union = left.union(right)
484+
the_union = left.union(right, sort=sort)
452485
assert isinstance(the_union, DatetimeIndex)
453486

454487
# order does not matter
455-
tm.assert_index_equal(right.union(left), the_union)
488+
if sort is None:
489+
tm.assert_index_equal(right.union(left, sort=sort), the_union)
456490

457491
# overlapping, but different offset
458492
rng = date_range(START, END, freq=BMonthEnd())
459493

460-
the_union = self.rng.union(rng)
494+
the_union = self.rng.union(rng, sort=sort)
461495
assert isinstance(the_union, DatetimeIndex)
462496

463497
def test_outer_join(self):

0 commit comments

Comments
 (0)