Skip to content

Commit 63d4590

Browse files
committed
BUG: names on union and intersection for Index were inconsistent (GH9943 GH9862)
1 parent cd1b168 commit 63d4590

File tree

9 files changed

+73
-29
lines changed

9 files changed

+73
-29
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -796,6 +796,7 @@ Indexing
796796
- Bug in :func:`IntervalIndex.symmetric_difference` where the symmetric difference with a non-``IntervalIndex`` did not raise (:issue:`18475`)
797797
- Bug in :class:`IntervalIndex` where set operations that returned an empty ``IntervalIndex`` had the wrong dtype (:issue:`19101`)
798798
- Bug in :meth:`DataFrame.drop_duplicates` where no ``KeyError`` is raised when passing in columns that don't exist on the ``DataFrame`` (issue:`19726`)
799+
- Bug in :func:`Index.union` where resulting names were not computed correrctly (:issue:`9943`, :issue:`9862`)
799800

800801

801802
MultiIndex

pandas/core/indexes/base.py

+39-15
Original file line numberDiff line numberDiff line change
@@ -1121,7 +1121,7 @@ def _convert_can_do_setop(self, other):
11211121
other = Index(other, name=self.name)
11221122
result_name = self.name
11231123
else:
1124-
result_name = self.name if self.name == other.name else None
1124+
result_name = self._get_intersection_name(other)
11251125
return other, result_name
11261126

11271127
def _convert_for_op(self, value):
@@ -2190,19 +2190,35 @@ def __or__(self, other):
21902190
def __xor__(self, other):
21912191
return self.symmetric_difference(other)
21922192

2193-
def _get_consensus_name(self, other):
2193+
def _get_union_name(self, other):
2194+
# GH 9943 9862
21942195
"""
2195-
Given 2 indexes, give a consensus name meaning
2196+
Given 2 indexes, give the union name meaning
21962197
we take the not None one, or None if the names differ.
2197-
Return a new object if we are resetting the name
21982198
"""
21992199
if self.name != other.name:
22002200
if self.name is None or other.name is None:
22012201
name = self.name or other.name
22022202
else:
22032203
name = None
2204-
if self.name != name:
2205-
return self._shallow_copy(name=name)
2204+
else:
2205+
name = self.name
2206+
return name
2207+
2208+
def _get_intersection_name(self, other):
2209+
# GH 9943 9862
2210+
return self.name if self.name == other.name else None
2211+
2212+
def _get_consensus_name_object(self, other, name_converter):
2213+
"""
2214+
Given 2 indexes, give a consensus name meaning
2215+
we use the name converter (either _get_union_name or
2216+
get_intersection_name) to determine the name.
2217+
Return a new object if we are resetting the name
2218+
"""
2219+
name = name_converter(other)
2220+
if self.name != name:
2221+
return self._shallow_copy(name=name)
22062222
return self
22072223

22082224
def union(self, other):
@@ -2230,10 +2246,12 @@ def union(self, other):
22302246
other = _ensure_index(other)
22312247

22322248
if len(other) == 0 or self.equals(other):
2233-
return self._get_consensus_name(other)
2249+
return self._get_consensus_name_object(other,
2250+
self._get_union_name)
22342251

22352252
if len(self) == 0:
2236-
return other._get_consensus_name(self)
2253+
return other._get_consensus_name_object(self,
2254+
other._get_union_name)
22372255

22382256
# TODO: is_dtype_union_equal is a hack around
22392257
# 1. buggy set ops with duplicates (GH #13432)
@@ -2296,11 +2314,15 @@ def union(self, other):
22962314
stacklevel=3)
22972315

22982316
# for subclasses
2299-
return self._wrap_union_result(other, result)
2317+
return self._wrap_setop_result(other, result, self._get_union_name)
23002318

2301-
def _wrap_union_result(self, other, result):
2302-
name = self.name if self.name == other.name else None
2303-
return self.__class__(result, name=name)
2319+
def _wrap_setop_result(self, other, result, name_func):
2320+
# GH 9943 9862
2321+
"""
2322+
name_func is either self._get_union_name or
2323+
self._get_intersection_name
2324+
"""
2325+
return self.__class__(result, name=name_func(other))
23042326

23052327
def intersection(self, other):
23062328
"""
@@ -2330,7 +2352,8 @@ def intersection(self, other):
23302352
other = _ensure_index(other)
23312353

23322354
if self.equals(other):
2333-
return self._get_consensus_name(other)
2355+
return self._get_consensus_name_object(other,
2356+
self._get_intersection_name)
23342357

23352358
if not is_dtype_equal(self.dtype, other.dtype):
23362359
this = self.astype('O')
@@ -2350,7 +2373,8 @@ def intersection(self, other):
23502373
if self.is_monotonic and other.is_monotonic:
23512374
try:
23522375
result = self._inner_indexer(lvals, rvals)[0]
2353-
return self._wrap_union_result(other, result)
2376+
return self._wrap_setop_result(other, result,
2377+
self._get_intersection_name)
23542378
except TypeError:
23552379
pass
23562380

@@ -3479,7 +3503,7 @@ def _join_monotonic(self, other, how='left', return_indexers=False):
34793503
return join_index
34803504

34813505
def _wrap_joined_index(self, joined, other):
3482-
name = self.name if self.name == other.name else None
3506+
name = self._get_intersection_name(other)
34833507
return Index(joined, name=name)
34843508

34853509
def _get_string_slice(self, key, use_lhs=True, use_rhs=True):

pandas/core/indexes/datetimes.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1245,7 +1245,7 @@ def _maybe_utc_convert(self, other):
12451245
return this, other
12461246

12471247
def _wrap_joined_index(self, joined, other):
1248-
name = self.name if self.name == other.name else None
1248+
name = self._get_intersection_name(other)
12491249
if (isinstance(other, DatetimeIndex) and
12501250
self.offset == other.offset and
12511251
self._can_fast_union(other)):
@@ -1341,8 +1341,8 @@ def __iter__(self):
13411341
box="timestamp")
13421342
return iter(converted)
13431343

1344-
def _wrap_union_result(self, other, result):
1345-
name = self.name if self.name == other.name else None
1344+
def _wrap_setop_result(self, other, result, name_func):
1345+
name = name_func(other)
13461346
if not timezones.tz_compare(self.tz, other.tz):
13471347
raise ValueError('Passed item and index have different timezone')
13481348
return self._simple_new(result, name=name, freq=None, tz=self.tz)

pandas/core/indexes/interval.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1351,7 +1351,10 @@ def func(self, other):
13511351
raise TypeError(msg.format(op=op_name))
13521352

13531353
result = getattr(self._multiindex, op_name)(other._multiindex)
1354-
result_name = self.name if self.name == other.name else None
1354+
if op_name == 'union':
1355+
result_name = self._get_union_name(other)
1356+
else:
1357+
result_name = self._get_intersection_name(other)
13551358

13561359
# GH 19101: ensure empty results have correct dtype
13571360
if result.empty:

pandas/core/indexes/numeric.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ def _convert_scalar_indexer(self, key, kind=None):
187187
._convert_scalar_indexer(key, kind=kind))
188188

189189
def _wrap_joined_index(self, joined, other):
190-
name = self.name if self.name == other.name else None
190+
name = self._get_intersection_name(other)
191191
return Int64Index(joined, name=name)
192192

193193
@classmethod
@@ -264,7 +264,7 @@ def _convert_index_indexer(self, keyarr):
264264
return keyarr
265265

266266
def _wrap_joined_index(self, joined, other):
267-
name = self.name if self.name == other.name else None
267+
name = self._get_intersection_name(other)
268268
return UInt64Index(joined, name=name)
269269

270270
@classmethod

pandas/core/indexes/period.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -998,8 +998,8 @@ def _assert_can_do_setop(self, other):
998998
msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr)
999999
raise IncompatibleFrequency(msg)
10001000

1001-
def _wrap_union_result(self, other, result):
1002-
name = self.name if self.name == other.name else None
1001+
def _wrap_setop_result(self, other, result, name_func):
1002+
name = name_func(other)
10031003
result = self._apply_meta(result)
10041004
result.name = name
10051005
return result

pandas/core/indexes/timedeltas.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -586,7 +586,7 @@ def join(self, other, how='left', level=None, return_indexers=False,
586586
sort=sort)
587587

588588
def _wrap_joined_index(self, joined, other):
589-
name = self.name if self.name == other.name else None
589+
name = self._get_intersection_name(other)
590590
if (isinstance(other, TimedeltaIndex) and self.freq == other.freq and
591591
self._can_fast_union(other)):
592592
joined = self._shallow_copy(joined, name=name)
@@ -646,8 +646,8 @@ def _fast_union(self, other):
646646
else:
647647
return left
648648

649-
def _wrap_union_result(self, other, result):
650-
name = self.name if self.name == other.name else None
649+
def _wrap_setop_result(self, other, result, name_func):
650+
name = name_func(other)
651651
return self._simple_new(result, name=name, freq=None)
652652

653653
def intersection(self, other):

pandas/tests/indexes/test_base.py

+14
Original file line numberDiff line numberDiff line change
@@ -754,6 +754,13 @@ def test_union(self):
754754
expected = Index(list('ab'), name='B')
755755
tm.assert_index_equal(union, expected)
756756

757+
# GH 9943 9862
758+
first = Index(list('abc'))
759+
second = Index(list('ab'), name='B')
760+
union = first.union(second)
761+
expected = Index(list('abc'), name='B')
762+
tm.assert_index_equal(union, expected)
763+
757764
first = Index([])
758765
second = Index(list('ab'), name='B')
759766
union = first.union(second)
@@ -772,6 +779,13 @@ def test_union(self):
772779
expected = Index(list('ab'), name='A')
773780
tm.assert_index_equal(union, expected)
774781

782+
# GH 9943 9862
783+
first = Index(list('ab'), name='A')
784+
second = Index(list('abc'))
785+
union = first.union(second)
786+
expected = Index(list('abc'), name='A')
787+
tm.assert_index_equal(union, expected)
788+
775789
first = Index(list('ab'), name='A')
776790
second = Index([])
777791
union = first.union(second)

pandas/tests/indexes/test_range.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ def test_join_outer(self):
432432
tm.assert_index_equal(res, noidx_res)
433433

434434
eres = Int64Index([0, 2, 4, 6, 8, 10, 12, 14, 15, 16, 17, 18, 19, 20,
435-
21, 22, 23, 24, 25])
435+
21, 22, 23, 24, 25], name=self.index.name)
436436
elidx = np.array([0, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, 9,
437437
-1, -1, -1, -1, -1, -1, -1], dtype=np.intp)
438438
eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 10, 9, 8, 7, 6,
@@ -667,11 +667,13 @@ def test_union_noncomparable(self):
667667
now = datetime.now()
668668
other = Index([now + timedelta(i) for i in range(4)], dtype=object)
669669
result = self.index.union(other)
670-
expected = Index(np.concatenate((self.index, other)))
670+
expected = Index(np.concatenate((self.index, other)),
671+
name=self.index.name)
671672
tm.assert_index_equal(result, expected)
672673

673674
result = other.union(self.index)
674-
expected = Index(np.concatenate((other, self.index)))
675+
expected = Index(np.concatenate((other, self.index)),
676+
name=self.index.name)
675677
tm.assert_index_equal(result, expected)
676678

677679
def test_union(self):

0 commit comments

Comments
 (0)