Skip to content

Commit a67279d

Browse files
jbrockmendelluckyvs1
authored andcommitted
REF: share Index.union (pandas-dev#38671)
* BUG: MultiIndex, IntervalIndex intersection with Categorical * standardize * Share intersection * REF: share Index.union * move doctest examples
1 parent 1dd97e2 commit a67279d

File tree

5 files changed

+58
-112
lines changed

5 files changed

+58
-112
lines changed

pandas/core/indexes/base.py

+55-6
Original file line numberDiff line numberDiff line change
@@ -2656,12 +2656,59 @@ def union(self, other, sort=None):
26562656
>>> idx2 = pd.Index([1, 2, 3, 4])
26572657
>>> idx1.union(idx2)
26582658
Index(['a', 'b', 'c', 'd', 1, 2, 3, 4], dtype='object')
2659+
2660+
MultiIndex case
2661+
2662+
>>> idx1 = pd.MultiIndex.from_arrays(
2663+
... [[1, 1, 2, 2], ["Red", "Blue", "Red", "Blue"]]
2664+
... )
2665+
>>> idx1
2666+
MultiIndex([(1, 'Red'),
2667+
(1, 'Blue'),
2668+
(2, 'Red'),
2669+
(2, 'Blue')],
2670+
)
2671+
>>> idx2 = pd.MultiIndex.from_arrays(
2672+
... [[3, 3, 2, 2], ["Red", "Green", "Red", "Green"]]
2673+
... )
2674+
>>> idx2
2675+
MultiIndex([(3, 'Red'),
2676+
(3, 'Green'),
2677+
(2, 'Red'),
2678+
(2, 'Green')],
2679+
)
2680+
>>> idx1.union(idx2)
2681+
MultiIndex([(1, 'Blue'),
2682+
(1, 'Red'),
2683+
(2, 'Blue'),
2684+
(2, 'Green'),
2685+
(2, 'Red'),
2686+
(3, 'Green'),
2687+
(3, 'Red')],
2688+
)
2689+
>>> idx1.union(idx2, sort=False)
2690+
MultiIndex([(1, 'Red'),
2691+
(1, 'Blue'),
2692+
(2, 'Red'),
2693+
(2, 'Blue'),
2694+
(3, 'Red'),
2695+
(3, 'Green'),
2696+
(2, 'Green')],
2697+
)
26592698
"""
26602699
self._validate_sort_keyword(sort)
26612700
self._assert_can_do_setop(other)
26622701
other, result_name = self._convert_can_do_setop(other)
26632702

26642703
if not is_dtype_equal(self.dtype, other.dtype):
2704+
if isinstance(self, ABCMultiIndex) and not is_object_dtype(
2705+
unpack_nested_dtype(other)
2706+
):
2707+
raise NotImplementedError(
2708+
"Can only union MultiIndex with MultiIndex or Index of tuples, "
2709+
"try mi.to_flat_index().union(other) instead."
2710+
)
2711+
26652712
dtype = find_common_type([self.dtype, other.dtype])
26662713
if self._is_numeric_dtype and other._is_numeric_dtype:
26672714
# Right now, we treat union(int, float) a bit special.
@@ -2680,6 +2727,14 @@ def union(self, other, sort=None):
26802727
right = other.astype(dtype, copy=False)
26812728
return left.union(right, sort=sort)
26822729

2730+
elif not len(other) or self.equals(other):
2731+
# NB: whether this (and the `if not len(self)` check below) come before
2732+
# or after the is_dtype_equal check above affects the returned dtype
2733+
return self._get_reconciled_name_object(other)
2734+
2735+
elif not len(self):
2736+
return other._get_reconciled_name_object(self)
2737+
26832738
result = self._union(other, sort=sort)
26842739

26852740
return self._wrap_setop_result(other, result)
@@ -2703,12 +2758,6 @@ def _union(self, other, sort):
27032758
-------
27042759
Index
27052760
"""
2706-
if not len(other) or self.equals(other):
2707-
return self
2708-
2709-
if not len(self):
2710-
return other
2711-
27122761
# TODO(EA): setops-refactor, clean all this up
27132762
lvals = self._values
27142763
rvals = other._values

pandas/core/indexes/datetimelike.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -711,6 +711,9 @@ def _can_fast_intersect(self: _T, other: _T) -> bool:
711711
# so intersection will preserve freq
712712
return True
713713

714+
elif not len(self) or not len(other):
715+
return False
716+
714717
elif isinstance(self.freq, Tick):
715718
# We "line up" if and only if the difference between two of our points
716719
# is a multiple of our freq
@@ -794,9 +797,6 @@ def _fast_union(self, other, sort=None):
794797
return left
795798

796799
def _union(self, other, sort):
797-
if not len(other) or self.equals(other) or not len(self):
798-
return super()._union(other, sort=sort)
799-
800800
# We are called by `union`, which is responsible for this validation
801801
assert isinstance(other, type(self))
802802

pandas/core/indexes/multi.py

-89
Original file line numberDiff line numberDiff line change
@@ -3502,98 +3502,9 @@ def equal_levels(self, other) -> bool:
35023502
# --------------------------------------------------------------------
35033503
# Set Methods
35043504

3505-
def union(self, other, sort=None):
3506-
"""
3507-
Form the union of two MultiIndex objects
3508-
3509-
Parameters
3510-
----------
3511-
other : MultiIndex or array / Index of tuples
3512-
sort : False or None, default None
3513-
Whether to sort the resulting Index.
3514-
3515-
* None : Sort the result, except when
3516-
3517-
1. `self` and `other` are equal.
3518-
2. `self` has length 0.
3519-
3. Some values in `self` or `other` cannot be compared.
3520-
A RuntimeWarning is issued in this case.
3521-
3522-
* False : do not sort the result.
3523-
3524-
.. versionadded:: 0.24.0
3525-
3526-
.. versionchanged:: 0.24.1
3527-
3528-
Changed the default value from ``True`` to ``None``
3529-
(without change in behaviour).
3530-
3531-
Returns
3532-
-------
3533-
Index
3534-
3535-
Examples
3536-
--------
3537-
>>> idx1 = pd.MultiIndex.from_arrays(
3538-
... [[1, 1, 2, 2], ["Red", "Blue", "Red", "Blue"]]
3539-
... )
3540-
>>> idx1
3541-
MultiIndex([(1, 'Red'),
3542-
(1, 'Blue'),
3543-
(2, 'Red'),
3544-
(2, 'Blue')],
3545-
)
3546-
>>> idx2 = pd.MultiIndex.from_arrays(
3547-
... [[3, 3, 2, 2], ["Red", "Green", "Red", "Green"]]
3548-
... )
3549-
>>> idx2
3550-
MultiIndex([(3, 'Red'),
3551-
(3, 'Green'),
3552-
(2, 'Red'),
3553-
(2, 'Green')],
3554-
)
3555-
3556-
>>> idx1.union(idx2)
3557-
MultiIndex([(1, 'Blue'),
3558-
(1, 'Red'),
3559-
(2, 'Blue'),
3560-
(2, 'Green'),
3561-
(2, 'Red'),
3562-
(3, 'Green'),
3563-
(3, 'Red')],
3564-
)
3565-
3566-
>>> idx1.union(idx2, sort=False)
3567-
MultiIndex([(1, 'Red'),
3568-
(1, 'Blue'),
3569-
(2, 'Red'),
3570-
(2, 'Blue'),
3571-
(3, 'Red'),
3572-
(3, 'Green'),
3573-
(2, 'Green')],
3574-
)
3575-
"""
3576-
self._validate_sort_keyword(sort)
3577-
self._assert_can_do_setop(other)
3578-
other, _ = self._convert_can_do_setop(other)
3579-
3580-
if not len(other) or self.equals(other):
3581-
return self._get_reconciled_name_object(other)
3582-
3583-
if not len(self):
3584-
return other._get_reconciled_name_object(self)
3585-
3586-
return self._union(other, sort=sort)
3587-
35883505
def _union(self, other, sort):
35893506
other, result_names = self._convert_can_do_setop(other)
35903507

3591-
if not self._should_compare(other):
3592-
raise NotImplementedError(
3593-
"Can only union MultiIndex with MultiIndex or Index of tuples, "
3594-
"try mi.to_flat_index().union(other) instead."
3595-
)
3596-
35973508
# We could get here with CategoricalIndex other
35983509
rvals = other._values.astype(object, copy=False)
35993510
uniq_tuples = lib.fast_unique_multiple([self._values, rvals], sort=sort)

pandas/core/indexes/period.py

-11
Original file line numberDiff line numberDiff line change
@@ -646,17 +646,6 @@ def _difference(self, other, sort):
646646
return self._setop(other, sort, opname="difference")
647647

648648
def _union(self, other, sort):
649-
if not len(other) or self.equals(other) or not len(self):
650-
return super()._union(other, sort=sort)
651-
652-
# We are called by `union`, which is responsible for this validation
653-
assert isinstance(other, type(self))
654-
655-
if not is_dtype_equal(self.dtype, other.dtype):
656-
this = self.astype("O")
657-
other = other.astype("O")
658-
return this._union(other, sort=sort)
659-
660649
return self._setop(other, sort, opname="_union")
661650

662651
# ------------------------------------------------------------------------

pandas/core/indexes/range.py

-3
Original file line numberDiff line numberDiff line change
@@ -576,9 +576,6 @@ def _union(self, other, sort):
576576
-------
577577
union : Index
578578
"""
579-
if not len(other) or self.equals(other) or not len(self):
580-
return super()._union(other, sort=sort)
581-
582579
if isinstance(other, RangeIndex) and sort is None:
583580
start_s, step_s = self.start, self.step
584581
end_s = self.start + self.step * (len(self) - 1)

0 commit comments

Comments
 (0)