Skip to content

Commit aad5884

Browse files
committed
BUG: names on union and intersection for Index were inconsistent (GH9943 GH9862)
1 parent 1e4c50a commit aad5884

File tree

8 files changed

+78
-41
lines changed

8 files changed

+78
-41
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -837,6 +837,7 @@ Indexing
837837
- Bug in :func:`IntervalIndex.symmetric_difference` where the symmetric difference with a non-``IntervalIndex`` did not raise (:issue:`18475`)
838838
- Bug in :class:`IntervalIndex` where set operations that returned an empty ``IntervalIndex`` had the wrong dtype (:issue:`19101`)
839839
- Bug in :meth:`DataFrame.drop_duplicates` where no ``KeyError`` is raised when passing in columns that don't exist on the ``DataFrame`` (issue:`19726`)
840+
- Bug in :func:`Index.union` where resulting names were not computed correctly for certain cases (:issue:`9943`, :issue:`9862`)
840841

841842

842843
MultiIndex

pandas/core/indexes/base.py

+17-22
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
import pandas.core.algorithms as algos
5858
import pandas.core.sorting as sorting
5959
from pandas.io.formats.printing import pprint_thing
60-
from pandas.core.ops import make_invalid_op
60+
from pandas.core.ops import make_invalid_op, get_op_result_name
6161
from pandas.core.config import get_option
6262
from pandas.core.strings import StringMethods
6363

@@ -1191,7 +1191,7 @@ def _convert_can_do_setop(self, other):
11911191
other = Index(other, name=self.name)
11921192
result_name = self.name
11931193
else:
1194-
result_name = self.name if self.name == other.name else None
1194+
result_name = get_op_result_name(self, other)
11951195
return other, result_name
11961196

11971197
def _convert_for_op(self, value):
@@ -2263,19 +2263,15 @@ def __or__(self, other):
22632263
def __xor__(self, other):
22642264
return self.symmetric_difference(other)
22652265

2266-
def _get_consensus_name(self, other):
2266+
def _get_setop_name_object(self, other):
22672267
"""
2268-
Given 2 indexes, give a consensus name meaning
2269-
we take the not None one, or None if the names differ.
2270-
Return a new object if we are resetting the name
2268+
Given 2 indexes, give a setop name and object, meaning
2269+
we use get_op_result_name to return the name, and then
2270+
return a new object if we are resetting the name
22712271
"""
2272-
if self.name != other.name:
2273-
if self.name is None or other.name is None:
2274-
name = self.name or other.name
2275-
else:
2276-
name = None
2277-
if self.name != name:
2278-
return self._shallow_copy(name=name)
2272+
name = get_op_result_name(self, other)
2273+
if self.name != name:
2274+
return self._shallow_copy(name=name)
22792275
return self
22802276

22812277
def union(self, other):
@@ -2303,10 +2299,10 @@ def union(self, other):
23032299
other = _ensure_index(other)
23042300

23052301
if len(other) == 0 or self.equals(other):
2306-
return self._get_consensus_name(other)
2302+
return self._get_setop_name_object(other)
23072303

23082304
if len(self) == 0:
2309-
return other._get_consensus_name(self)
2305+
return other._get_setop_name_object(self)
23102306

23112307
# TODO: is_dtype_union_equal is a hack around
23122308
# 1. buggy set ops with duplicates (GH #13432)
@@ -2369,11 +2365,10 @@ def union(self, other):
23692365
stacklevel=3)
23702366

23712367
# for subclasses
2372-
return self._wrap_union_result(other, result)
2368+
return self._wrap_setop_result(other, result)
23732369

2374-
def _wrap_union_result(self, other, result):
2375-
name = self.name if self.name == other.name else None
2376-
return self.__class__(result, name=name)
2370+
def _wrap_setop_result(self, other, result):
2371+
return self.__class__(result, name=get_op_result_name(self, other))
23772372

23782373
def intersection(self, other):
23792374
"""
@@ -2403,7 +2398,7 @@ def intersection(self, other):
24032398
other = _ensure_index(other)
24042399

24052400
if self.equals(other):
2406-
return self._get_consensus_name(other)
2401+
return self._get_setop_name_object(other)
24072402

24082403
if not is_dtype_equal(self.dtype, other.dtype):
24092404
this = self.astype('O')
@@ -2423,7 +2418,7 @@ def intersection(self, other):
24232418
if self.is_monotonic and other.is_monotonic:
24242419
try:
24252420
result = self._inner_indexer(lvals, rvals)[0]
2426-
return self._wrap_union_result(other, result)
2421+
return self._wrap_setop_result(other, result)
24272422
except TypeError:
24282423
pass
24292424

@@ -3552,7 +3547,7 @@ def _join_monotonic(self, other, how='left', return_indexers=False):
35523547
return join_index
35533548

35543549
def _wrap_joined_index(self, joined, other):
3555-
name = self.name if self.name == other.name else None
3550+
name = get_op_result_name(self, other)
35563551
return Index(joined, name=name)
35573552

35583553
def _get_string_slice(self, key, use_lhs=True, use_rhs=True):

pandas/core/indexes/datetimes.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838

3939
from pandas.core.indexes.base import Index, _index_shared_docs
4040
from pandas.core.indexes.numeric import Int64Index, Float64Index
41+
from pandas.core.ops import get_op_result_name
4142
import pandas.compat as compat
4243
from pandas.tseries.frequencies import to_offset, get_period_alias, Resolution
4344
from pandas.core.indexes.datetimelike import (
@@ -1237,7 +1238,7 @@ def _maybe_utc_convert(self, other):
12371238
return this, other
12381239

12391240
def _wrap_joined_index(self, joined, other):
1240-
name = self.name if self.name == other.name else None
1241+
name = get_op_result_name(self, other)
12411242
if (isinstance(other, DatetimeIndex) and
12421243
self.offset == other.offset and
12431244
self._can_fast_union(other)):
@@ -1333,8 +1334,8 @@ def __iter__(self):
13331334
box="timestamp")
13341335
return iter(converted)
13351336

1336-
def _wrap_union_result(self, other, result):
1337-
name = self.name if self.name == other.name else None
1337+
def _wrap_setop_result(self, other, result):
1338+
name = get_op_result_name(self, other)
13381339
if not timezones.tz_compare(self.tz, other.tz):
13391340
raise ValueError('Passed item and index have different timezone')
13401341
return self._simple_new(result, name=name, freq=None, tz=self.tz)

pandas/core/indexes/interval.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from pandas.core.indexes.base import (
2727
Index, _ensure_index,
2828
default_pprint, _index_shared_docs)
29+
from pandas.core.ops import get_op_result_name
2930

3031
from pandas._libs import Timestamp, Timedelta
3132
from pandas._libs.interval import (
@@ -1351,7 +1352,7 @@ def func(self, other):
13511352
raise TypeError(msg.format(op=op_name))
13521353

13531354
result = getattr(self._multiindex, op_name)(other._multiindex)
1354-
result_name = self.name if self.name == other.name else None
1355+
result_name = get_op_result_name(self, other)
13551356

13561357
# GH 19101: ensure empty results have correct dtype
13571358
if result.empty:

pandas/core/indexes/numeric.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from pandas.util._decorators import Appender, cache_readonly
1919
import pandas.core.dtypes.concat as _concat
2020
import pandas.core.indexes.base as ibase
21-
21+
from pandas.core.ops import get_op_result_name
2222

2323
_num_index_shared_docs = dict()
2424

@@ -187,7 +187,7 @@ def _convert_scalar_indexer(self, key, kind=None):
187187
._convert_scalar_indexer(key, kind=kind))
188188

189189
def _wrap_joined_index(self, joined, other):
190-
name = self.name if self.name == other.name else None
190+
name = get_op_result_name(self, other)
191191
return Int64Index(joined, name=name)
192192

193193
@classmethod
@@ -264,7 +264,7 @@ def _convert_index_indexer(self, keyarr):
264264
return keyarr
265265

266266
def _wrap_joined_index(self, joined, other):
267-
name = self.name if self.name == other.name else None
267+
name = get_op_result_name(self, other)
268268
return UInt64Index(joined, name=name)
269269

270270
@classmethod

pandas/core/indexes/period.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
_ensure_object)
2020
from pandas.core.dtypes.dtypes import PeriodDtype
2121
from pandas.core.dtypes.generic import ABCSeries
22+
from pandas.core.ops import get_op_result_name
2223

2324
import pandas.tseries.frequencies as frequencies
2425
from pandas.tseries.frequencies import get_freq_code as _gfc
@@ -996,8 +997,8 @@ def _assert_can_do_setop(self, other):
996997
msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr)
997998
raise IncompatibleFrequency(msg)
998999

999-
def _wrap_union_result(self, other, result):
1000-
name = self.name if self.name == other.name else None
1000+
def _wrap_setop_result(self, other, result):
1001+
name = get_op_result_name(self, other)
10011002
result = self._apply_meta(result)
10021003
result.name = name
10031004
return result

pandas/core/indexes/timedeltas.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from pandas.core.base import _shared_docs
2727
from pandas.core.indexes.base import _index_shared_docs
2828
import pandas.core.common as com
29+
from pandas.core.ops import get_op_result_name
2930
import pandas.core.dtypes.concat as _concat
3031
from pandas.util._decorators import Appender, Substitution, deprecate_kwarg
3132
from pandas.core.indexes.datetimelike import TimelikeOps, DatetimeIndexOpsMixin
@@ -577,7 +578,7 @@ def join(self, other, how='left', level=None, return_indexers=False,
577578
sort=sort)
578579

579580
def _wrap_joined_index(self, joined, other):
580-
name = self.name if self.name == other.name else None
581+
name = get_op_result_name(self, other)
581582
if (isinstance(other, TimedeltaIndex) and self.freq == other.freq and
582583
self._can_fast_union(other)):
583584
joined = self._shallow_copy(joined, name=name)
@@ -637,8 +638,8 @@ def _fast_union(self, other):
637638
else:
638639
return left
639640

640-
def _wrap_union_result(self, other, result):
641-
name = self.name if self.name == other.name else None
641+
def _wrap_setop_result(self, other, result):
642+
name = get_op_result_name(self, other)
642643
return self._simple_new(result, name=name, freq=None)
643644

644645
def intersection(self, other):

pandas/tests/indexes/test_base.py

+44-7
Original file line numberDiff line numberDiff line change
@@ -730,7 +730,15 @@ def test_union(self):
730730
union = Index([]).union(first)
731731
assert union is first
732732

733-
# preserve names
733+
# preserve names only when they are the same
734+
# GH 9943 9862
735+
736+
first = Index(list('ab'), name='A')
737+
second = Index(list('abc'), name='A')
738+
union = first.union(second)
739+
expected = Index(list('abc'), name='A')
740+
tm.assert_index_equal(union, expected)
741+
734742
first = Index(list('ab'), name='A')
735743
second = Index(list('ab'), name='B')
736744
union = first.union(second)
@@ -752,37 +760,66 @@ def test_union(self):
752760
first = Index(list('ab'))
753761
second = Index(list('ab'), name='B')
754762
union = first.union(second)
755-
expected = Index(list('ab'), name='B')
763+
expected = Index(list('ab'), name=None)
764+
tm.assert_index_equal(union, expected)
765+
766+
# GH 9943 9862
767+
first = Index(list('abc'))
768+
second = Index(list('ab'), name='B')
769+
union = first.union(second)
770+
expected = Index(list('abc'), name=None)
756771
tm.assert_index_equal(union, expected)
757772

758773
first = Index([])
759774
second = Index(list('ab'), name='B')
760775
union = first.union(second)
761-
expected = Index(list('ab'), name='B')
776+
expected = Index(list('ab'), name=None)
762777
tm.assert_index_equal(union, expected)
763778

764779
first = Index(list('ab'))
765780
second = Index([], name='B')
766781
union = first.union(second)
767-
expected = Index(list('ab'), name='B')
782+
expected = Index(list('ab'), name=None)
768783
tm.assert_index_equal(union, expected)
769784

770785
first = Index(list('ab'), name='A')
771786
second = Index(list('ab'))
772787
union = first.union(second)
773-
expected = Index(list('ab'), name='A')
788+
expected = Index(list('ab'), name=None)
789+
tm.assert_index_equal(union, expected)
790+
791+
# GH 9943 9862
792+
first = Index(list('ab'), name='A')
793+
second = Index(list('abc'))
794+
union = first.union(second)
795+
expected = Index(list('abc'), name=None)
774796
tm.assert_index_equal(union, expected)
775797

776798
first = Index(list('ab'), name='A')
777799
second = Index([])
778800
union = first.union(second)
779-
expected = Index(list('ab'), name='A')
801+
expected = Index(list('ab'), name=None)
780802
tm.assert_index_equal(union, expected)
781803

782804
first = Index([], name='A')
783805
second = Index(list('ab'))
784806
union = first.union(second)
785-
expected = Index(list('ab'), name='A')
807+
expected = Index(list('ab'), name=None)
808+
tm.assert_index_equal(union, expected)
809+
810+
# Chained unions handles names correctly
811+
i1 = Index([1, 2], name='i1')
812+
i2 = Index([3, 4], name='i2')
813+
i3 = Index([5, 6], name='i3')
814+
union = i1.union(i2.union(i3))
815+
expected = i1.union(i2).union(i3)
816+
tm.assert_index_equal(union, expected)
817+
818+
j1 = Index([1, 2], name='j1')
819+
j2 = Index([], name='j2')
820+
j3 = Index([], name='j3')
821+
union = j1.union(j2.union(j3))
822+
expected = j1.union(j2).union(j3)
786823
tm.assert_index_equal(union, expected)
787824

788825
with tm.assert_produces_warning(RuntimeWarning):

0 commit comments

Comments
 (0)