From 7dc16c00a4b7d5a777cb7d8a01ee154ba6606def Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 5 Dec 2020 22:47:55 +0100 Subject: [PATCH 1/7] CLN: Implement multiindex handling for get_op_result_name --- pandas/core/indexes/multi.py | 4 +-- pandas/core/ops/common.py | 40 +++++++++++++++++++++-- pandas/tests/indexes/multi/test_setops.py | 8 +++++ pandas/tests/test_common.py | 23 +++++++++++++ 4 files changed, 70 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index fd47c23b7c92b..20feb4cf9e476 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3614,11 +3614,11 @@ def intersection(self, other, sort=False): """ self._validate_sort_keyword(sort) self._assert_can_do_setop(other) - other, result_names = self._convert_can_do_setop(other) + other, _ = self._convert_can_do_setop(other) if self.equals(other): if self.has_duplicates: - return self.unique().rename(result_names) + return self.unique()._get_reconciled_name_object(other) return self._get_reconciled_name_object(other) return self._intersection(other, sort=sort) diff --git a/pandas/core/ops/common.py b/pandas/core/ops/common.py index a6bcab44e5519..bd3c3cc1bdf14 100644 --- a/pandas/core/ops/common.py +++ b/pandas/core/ops/common.py @@ -7,7 +7,12 @@ from pandas._libs.lib import item_from_zerodim from pandas._typing import F -from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndexClass, + ABCSeries, + ABCMultiIndex, +) def unpack_zerodim_and_defer(name: str) -> Callable[[F], F]: @@ -82,7 +87,9 @@ def get_op_result_name(left, right): name : object Usually a string """ - if isinstance(right, (ABCSeries, ABCIndexClass)): + if isinstance(left, ABCMultiIndex) and isinstance(right, ABCMultiIndex): + name = _maybe_match_names_multiindex(left, right) + elif isinstance(right, (ABCSeries, ABCIndexClass)): name = _maybe_match_name(left, right) else: name = left.name @@ -93,7 +100,7 @@ def _maybe_match_name(a, b): """ Try to find a name to attach to the result of an operation between a and b. If only one of these has a `name` attribute, return that - name. Otherwise return a consensus name if they match of None if + name. Otherwise return a consensus name if they match or None if they have different names. Parameters @@ -122,3 +129,30 @@ def _maybe_match_name(a, b): elif b_has: return b.name return None + + +def _maybe_match_names_multiindex(a, b): + """ + Try to find common names to attach to the result of an operation between + a and b. Return a consensus list of names if they match at least partly + or None if they have completely different names. + + Parameters + ---------- + a : MultiIndex + b : MultiIndex + + Returns + ------- + name : list of optional str or None + """ + if len(a.names) != len(b.names): + return None + names = [] + for a_name, b_name in zip(a.names, b.names): + if a_name == b_name: + names.append(a_name) + else: + # TODO: what if they both have np.nan for their names? + names.append(None) + return names diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index 51538c556de15..c17adeaa13ced 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -419,3 +419,11 @@ def test_intersect_with_duplicates(tuples, exp_tuples): result = left.intersection(right) expected = MultiIndex.from_tuples(exp_tuples, names=["first", "second"]) tm.assert_index_equal(result, expected) + + +def test_intersection_different_names(): + # GH# + mi = MultiIndex.from_arrays([[1], [3]], names=["c", "b"]) + mi2 = MultiIndex.from_arrays([[1], [3]]) + result = mi.intersection(mi2) + tm.assert_index_equal(result, mi2) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 8e1186b790e3d..08f20cae0f99f 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -109,6 +109,29 @@ def test_maybe_match_name(left, right, expected): assert ops.common._maybe_match_name(left, right) == expected +@pytest.mark.parametrize( + "data, names, expected", + [ + ((1,), None, None), + ((1,), ["a"], None), + ((1,), ["b"], None), + ((1, 2), ["c", "d"], [None, None]), + ((1, 2), ["b", "a"], [None, None]), + ((1, 2, 3), ["a", "b", "c"], None), + ((1, 2), ["a", "c"], ["a", None]), + ((1, 2), ["c", "b"], [None, "b"]), + ((1, 2), ["a", "b"], ["a", "b"]), + ((1, 2), [None, "b"], [None, "b"]), + ], +) +def test_maybe_match_names_multiindex(data, names, expected): + # GH# + mi = pd.MultiIndex.from_tuples([], names=["a", "b"]) + mi2 = pd.MultiIndex.from_tuples([data], names=names) + result = ops.common._maybe_match_names_multiindex(mi, mi2) + assert result == expected + + def test_standardize_mapping(): # No uninitialized defaultdicts msg = r"to_dict\(\) only accepts initialized defaultdicts" From 911801248e04b53b5c0a14a000b3be59ca2f4feb Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 5 Dec 2020 22:57:55 +0100 Subject: [PATCH 2/7] Change import order --- pandas/core/ops/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/ops/common.py b/pandas/core/ops/common.py index bd3c3cc1bdf14..9336062dde446 100644 --- a/pandas/core/ops/common.py +++ b/pandas/core/ops/common.py @@ -10,8 +10,8 @@ from pandas.core.dtypes.generic import ( ABCDataFrame, ABCIndexClass, - ABCSeries, ABCMultiIndex, + ABCSeries, ) From 9051d13d465829f8ea7fa3c7f26c4e383c75b5e2 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 6 Dec 2020 18:40:00 +0100 Subject: [PATCH 3/7] Override method --- pandas/core/indexes/base.py | 1 - pandas/core/indexes/multi.py | 12 ++++++++++++ pandas/core/ops/common.py | 6 ++---- pandas/tests/indexes/multi/test_setops.py | 2 +- pandas/tests/test_common.py | 4 ++-- 5 files changed, 17 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ba958b23e81af..b197a0d00cbe4 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2580,7 +2580,6 @@ def __nonzero__(self): # -------------------------------------------------------------------- # Set Operation Methods - @final def _get_reconciled_name_object(self, other): """ If the result of a set operation will be self, diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 20feb4cf9e476..145c1d341cba1 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -22,6 +22,7 @@ from pandas._libs.hashtable import duplicated_int64 from pandas._typing import AnyArrayLike, DtypeObj, Label, Scalar, Shape from pandas.compat.numpy import function as nv +from pandas.core.ops.common import maybe_match_names_multiindex from pandas.errors import InvalidIndexError, PerformanceWarning, UnsortedIndexError from pandas.util._decorators import Appender, cache_readonly, doc @@ -3591,6 +3592,17 @@ def _union(self, other, sort): def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: return is_object_dtype(dtype) + def _get_reconciled_name_object(self, other): + """ + If the result of a set operation will be self, + return self, unless the names change, in which + case make a shallow copy of self. + """ + names = maybe_match_names_multiindex(self, other) + if self.names != names: + return self.rename(names) + return self + def intersection(self, other, sort=False): """ Form the intersection of two MultiIndex objects. diff --git a/pandas/core/ops/common.py b/pandas/core/ops/common.py index 9336062dde446..d870f2766f11f 100644 --- a/pandas/core/ops/common.py +++ b/pandas/core/ops/common.py @@ -87,9 +87,7 @@ def get_op_result_name(left, right): name : object Usually a string """ - if isinstance(left, ABCMultiIndex) and isinstance(right, ABCMultiIndex): - name = _maybe_match_names_multiindex(left, right) - elif isinstance(right, (ABCSeries, ABCIndexClass)): + if isinstance(right, (ABCSeries, ABCIndexClass)): name = _maybe_match_name(left, right) else: name = left.name @@ -131,7 +129,7 @@ def _maybe_match_name(a, b): return None -def _maybe_match_names_multiindex(a, b): +def maybe_match_names_multiindex(a, b): """ Try to find common names to attach to the result of an operation between a and b. Return a consensus list of names if they match at least partly diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index c17adeaa13ced..ac23a5c10e9aa 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -422,7 +422,7 @@ def test_intersect_with_duplicates(tuples, exp_tuples): def test_intersection_different_names(): - # GH# + # GH#38323 mi = MultiIndex.from_arrays([[1], [3]], names=["c", "b"]) mi2 = MultiIndex.from_arrays([[1], [3]]) result = mi.intersection(mi2) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 08f20cae0f99f..d675338f86f20 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -125,10 +125,10 @@ def test_maybe_match_name(left, right, expected): ], ) def test_maybe_match_names_multiindex(data, names, expected): - # GH# + # GH#38323 mi = pd.MultiIndex.from_tuples([], names=["a", "b"]) mi2 = pd.MultiIndex.from_tuples([data], names=names) - result = ops.common._maybe_match_names_multiindex(mi, mi2) + result = ops.common.maybe_match_names_multiindex(mi, mi2) assert result == expected From 64af40489acd1bcce0d00ce26fe043f291654df3 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 6 Dec 2020 18:47:16 +0100 Subject: [PATCH 4/7] Move import --- pandas/core/indexes/multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 145c1d341cba1..914d4384a13aa 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -22,7 +22,6 @@ from pandas._libs.hashtable import duplicated_int64 from pandas._typing import AnyArrayLike, DtypeObj, Label, Scalar, Shape from pandas.compat.numpy import function as nv -from pandas.core.ops.common import maybe_match_names_multiindex from pandas.errors import InvalidIndexError, PerformanceWarning, UnsortedIndexError from pandas.util._decorators import Appender, cache_readonly, doc @@ -57,6 +56,7 @@ from pandas.core.indexes.frozen import FrozenList from pandas.core.indexes.numeric import Int64Index import pandas.core.missing as missing +from pandas.core.ops.common import maybe_match_names_multiindex from pandas.core.ops.invalid import make_invalid_op from pandas.core.sorting import ( get_group_index, From 18bd99a0e199fff252aadfb888bb914ac54c7fed Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 6 Dec 2020 18:59:48 +0100 Subject: [PATCH 5/7] Remove import --- pandas/core/ops/common.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pandas/core/ops/common.py b/pandas/core/ops/common.py index d870f2766f11f..cb26c4f9630cf 100644 --- a/pandas/core/ops/common.py +++ b/pandas/core/ops/common.py @@ -7,12 +7,7 @@ from pandas._libs.lib import item_from_zerodim from pandas._typing import F -from pandas.core.dtypes.generic import ( - ABCDataFrame, - ABCIndexClass, - ABCMultiIndex, - ABCSeries, -) +from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries def unpack_zerodim_and_defer(name: str) -> Callable[[F], F]: From 4647cda41630e6d7d660c376f6a0561595c38a4c Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 7 Dec 2020 20:48:50 +0100 Subject: [PATCH 6/7] Fix merge issue --- pandas/core/indexes/multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index cf95162d137e3..a49f62940f290 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3631,7 +3631,7 @@ def intersection(self, other, sort=False): if self.equals(other): if self.has_duplicates: return self.unique()._get_reconciled_name_object(other) - return self.rename(result_names) + return self._get_reconciled_name_object(other) return self._intersection(other, sort=sort) From 783262b1189422f00bb0b20c072d9fdb5096b616 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 9 Dec 2020 00:55:36 +0100 Subject: [PATCH 7/7] Move methods --- pandas/core/indexes/multi.py | 20 +++++++++++++++-- pandas/core/ops/common.py | 27 ----------------------- pandas/tests/indexes/multi/test_setops.py | 23 +++++++++++++++++++ pandas/tests/test_common.py | 23 ------------------- 4 files changed, 41 insertions(+), 52 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 2007143b63bb0..713b62b5c39eb 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -55,7 +55,6 @@ ) from pandas.core.indexes.frozen import FrozenList from pandas.core.indexes.numeric import Int64Index -from pandas.core.ops.common import maybe_match_names_multiindex from pandas.core.ops.invalid import make_invalid_op from pandas.core.sorting import ( get_group_index, @@ -3595,11 +3594,28 @@ def _get_reconciled_name_object(self, other): return self, unless the names change, in which case make a shallow copy of self. """ - names = maybe_match_names_multiindex(self, other) + names = self._maybe_match_names(other) if self.names != names: return self.rename(names) return self + def _maybe_match_names(self, other): + """ + Try to find common names to attach to the result of an operation between + a and b. Return a consensus list of names if they match at least partly + or None if they have completely different names. + """ + if len(self.names) != len(other.names): + return None + names = [] + for a_name, b_name in zip(self.names, other.names): + if a_name == b_name: + names.append(a_name) + else: + # TODO: what if they both have np.nan for their names? + names.append(None) + return names + def intersection(self, other, sort=False): """ Form the intersection of two MultiIndex objects. diff --git a/pandas/core/ops/common.py b/pandas/core/ops/common.py index cb26c4f9630cf..58ad3237c8288 100644 --- a/pandas/core/ops/common.py +++ b/pandas/core/ops/common.py @@ -122,30 +122,3 @@ def _maybe_match_name(a, b): elif b_has: return b.name return None - - -def maybe_match_names_multiindex(a, b): - """ - Try to find common names to attach to the result of an operation between - a and b. Return a consensus list of names if they match at least partly - or None if they have completely different names. - - Parameters - ---------- - a : MultiIndex - b : MultiIndex - - Returns - ------- - name : list of optional str or None - """ - if len(a.names) != len(b.names): - return None - names = [] - for a_name, b_name in zip(a.names, b.names): - if a_name == b_name: - names.append(a_name) - else: - # TODO: what if they both have np.nan for their names? - names.append(None) - return names diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index 8d1d297f74bdf..f9fc425e46696 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -421,6 +421,29 @@ def test_intersect_with_duplicates(tuples, exp_tuples): tm.assert_index_equal(result, expected) +@pytest.mark.parametrize( + "data, names, expected", + [ + ((1,), None, None), + ((1,), ["a"], None), + ((1,), ["b"], None), + ((1, 2), ["c", "d"], [None, None]), + ((1, 2), ["b", "a"], [None, None]), + ((1, 2, 3), ["a", "b", "c"], None), + ((1, 2), ["a", "c"], ["a", None]), + ((1, 2), ["c", "b"], [None, "b"]), + ((1, 2), ["a", "b"], ["a", "b"]), + ((1, 2), [None, "b"], [None, "b"]), + ], +) +def test_maybe_match_names(data, names, expected): + # GH#38323 + mi = pd.MultiIndex.from_tuples([], names=["a", "b"]) + mi2 = pd.MultiIndex.from_tuples([data], names=names) + result = mi._maybe_match_names(mi2) + assert result == expected + + def test_intersection_equal_different_names(): # GH#30302 mi1 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["c", "b"]) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index d675338f86f20..8e1186b790e3d 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -109,29 +109,6 @@ def test_maybe_match_name(left, right, expected): assert ops.common._maybe_match_name(left, right) == expected -@pytest.mark.parametrize( - "data, names, expected", - [ - ((1,), None, None), - ((1,), ["a"], None), - ((1,), ["b"], None), - ((1, 2), ["c", "d"], [None, None]), - ((1, 2), ["b", "a"], [None, None]), - ((1, 2, 3), ["a", "b", "c"], None), - ((1, 2), ["a", "c"], ["a", None]), - ((1, 2), ["c", "b"], [None, "b"]), - ((1, 2), ["a", "b"], ["a", "b"]), - ((1, 2), [None, "b"], [None, "b"]), - ], -) -def test_maybe_match_names_multiindex(data, names, expected): - # GH#38323 - mi = pd.MultiIndex.from_tuples([], names=["a", "b"]) - mi2 = pd.MultiIndex.from_tuples([data], names=names) - result = ops.common.maybe_match_names_multiindex(mi, mi2) - assert result == expected - - def test_standardize_mapping(): # No uninitialized defaultdicts msg = r"to_dict\(\) only accepts initialized defaultdicts"