Skip to content

Commit 810826d

Browse files
Dr-Irvgfyoung
authored andcommitted
BUG: names on union and intersection for Index were inconsistent (#19849)
Closes gh-9862. xref gh-9943.
1 parent c992fd7 commit 810826d

File tree

13 files changed

+143
-54
lines changed

13 files changed

+143
-54
lines changed

doc/source/whatsnew/v0.24.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -1220,6 +1220,8 @@ Indexing
12201220
- Bug in `scalar in Index` if scalar is a float while the ``Index`` is of integer dtype (:issue:`22085`)
12211221
- Bug in `MultiIndex.set_levels` when levels value is not subscriptable (:issue:`23273`)
12221222
- Bug where setting a timedelta column by ``Index`` causes it to be casted to double, and therefore lose precision (:issue:`23511`)
1223+
- Bug in :func:`Index.union` and :func:`Index.intersection` where name of the ``Index`` of the result was not computed correctly for certain cases (:issue:`9943`, :issue:`9862`)
1224+
12231225

12241226
Missing
12251227
^^^^^^^

pandas/core/indexes/base.py

+17-22
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
import pandas.core.sorting as sorting
6262
from pandas.io.formats.printing import (
6363
pprint_thing, default_pprint, format_object_summary, format_object_attrs)
64-
from pandas.core.ops import make_invalid_op
64+
from pandas.core.ops import make_invalid_op, get_op_result_name
6565
from pandas.core.strings import StringMethods
6666

6767
__all__ = ['Index']
@@ -1253,7 +1253,7 @@ def _convert_can_do_setop(self, other):
12531253
other = Index(other, name=self.name)
12541254
result_name = self.name
12551255
else:
1256-
result_name = self.name if self.name == other.name else None
1256+
result_name = get_op_result_name(self, other)
12571257
return other, result_name
12581258

12591259
def _convert_for_op(self, value):
@@ -2745,19 +2745,15 @@ def __or__(self, other):
27452745
def __xor__(self, other):
27462746
return self.symmetric_difference(other)
27472747

2748-
def _get_consensus_name(self, other):
2748+
def _get_reconciled_name_object(self, other):
27492749
"""
2750-
Given 2 indexes, give a consensus name meaning
2751-
we take the not None one, or None if the names differ.
2752-
Return a new object if we are resetting the name
2750+
If the result of a set operation will be self,
2751+
return self, unless the name changes, in which
2752+
case make a shallow copy of self.
27532753
"""
2754-
if self.name != other.name:
2755-
if self.name is None or other.name is None:
2756-
name = self.name or other.name
2757-
else:
2758-
name = None
2759-
if self.name != name:
2760-
return self._shallow_copy(name=name)
2754+
name = get_op_result_name(self, other)
2755+
if self.name != name:
2756+
return self._shallow_copy(name=name)
27612757
return self
27622758

27632759
def union(self, other):
@@ -2785,10 +2781,10 @@ def union(self, other):
27852781
other = ensure_index(other)
27862782

27872783
if len(other) == 0 or self.equals(other):
2788-
return self._get_consensus_name(other)
2784+
return self._get_reconciled_name_object(other)
27892785

27902786
if len(self) == 0:
2791-
return other._get_consensus_name(self)
2787+
return other._get_reconciled_name_object(self)
27922788

27932789
# TODO: is_dtype_union_equal is a hack around
27942790
# 1. buggy set ops with duplicates (GH #13432)
@@ -2851,11 +2847,10 @@ def union(self, other):
28512847
stacklevel=3)
28522848

28532849
# for subclasses
2854-
return self._wrap_union_result(other, result)
2850+
return self._wrap_setop_result(other, result)
28552851

2856-
def _wrap_union_result(self, other, result):
2857-
name = self.name if self.name == other.name else None
2858-
return self.__class__(result, name=name)
2852+
def _wrap_setop_result(self, other, result):
2853+
return self._constructor(result, name=get_op_result_name(self, other))
28592854

28602855
def intersection(self, other):
28612856
"""
@@ -2885,7 +2880,7 @@ def intersection(self, other):
28852880
other = ensure_index(other)
28862881

28872882
if self.equals(other):
2888-
return self._get_consensus_name(other)
2883+
return self._get_reconciled_name_object(other)
28892884

28902885
if not is_dtype_equal(self.dtype, other.dtype):
28912886
this = self.astype('O')
@@ -2905,7 +2900,7 @@ def intersection(self, other):
29052900
if self.is_monotonic and other.is_monotonic:
29062901
try:
29072902
result = self._inner_indexer(lvals, rvals)[0]
2908-
return self._wrap_union_result(other, result)
2903+
return self._wrap_setop_result(other, result)
29092904
except TypeError:
29102905
pass
29112906

@@ -4175,7 +4170,7 @@ def _join_monotonic(self, other, how='left', return_indexers=False):
41754170
return join_index
41764171

41774172
def _wrap_joined_index(self, joined, other):
4178-
name = self.name if self.name == other.name else None
4173+
name = get_op_result_name(self, other)
41794174
return Index(joined, name=name)
41804175

41814176
def _get_string_slice(self, key, use_lhs=True, use_rhs=True):

pandas/core/indexes/category.py

+5
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import pandas.core.common as com
2626
import pandas.core.missing as missing
2727
import pandas.core.indexes.base as ibase
28+
from pandas.core.ops import get_op_result_name
2829
from pandas.core.arrays.categorical import Categorical, contains
2930

3031
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
@@ -324,6 +325,10 @@ def itemsize(self):
324325
# Size of the items in categories, not codes.
325326
return self.values.itemsize
326327

328+
def _wrap_setop_result(self, other, result):
329+
name = get_op_result_name(self, other)
330+
return self._shallow_copy(result, name=name)
331+
327332
def get_values(self):
328333
""" return the underlying data as an ndarray """
329334
return self._data.get_values()

pandas/core/indexes/datetimes.py

+13-4
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434

3535
from pandas.core.indexes.base import Index, _index_shared_docs
3636
from pandas.core.indexes.numeric import Int64Index
37+
from pandas.core.ops import get_op_result_name
3738
import pandas.compat as compat
3839
from pandas.tseries.frequencies import to_offset, Resolution
3940
from pandas.core.indexes.datetimelike import (
@@ -592,6 +593,10 @@ def union(self, other):
592593
y : Index or DatetimeIndex
593594
"""
594595
self._assert_can_do_setop(other)
596+
597+
if len(other) == 0 or self.equals(other) or len(self) == 0:
598+
return super(DatetimeIndex, self).union(other)
599+
595600
if not isinstance(other, DatetimeIndex):
596601
try:
597602
other = DatetimeIndex(other)
@@ -674,7 +679,7 @@ def _maybe_utc_convert(self, other):
674679
return this, other
675680

676681
def _wrap_joined_index(self, joined, other):
677-
name = self.name if self.name == other.name else None
682+
name = get_op_result_name(self, other)
678683
if (isinstance(other, DatetimeIndex) and
679684
self.freq == other.freq and
680685
self._can_fast_union(other)):
@@ -745,11 +750,11 @@ def _fast_union(self, other):
745750
else:
746751
return left
747752

748-
def _wrap_union_result(self, other, result):
749-
name = self.name if self.name == other.name else None
753+
def _wrap_setop_result(self, other, result):
754+
name = get_op_result_name(self, other)
750755
if not timezones.tz_compare(self.tz, other.tz):
751756
raise ValueError('Passed item and index have different timezone')
752-
return self._simple_new(result, name=name, freq=None, tz=self.tz)
757+
return self._shallow_copy(result, name=name, freq=None, tz=self.tz)
753758

754759
def intersection(self, other):
755760
"""
@@ -765,6 +770,10 @@ def intersection(self, other):
765770
y : Index or DatetimeIndex
766771
"""
767772
self._assert_can_do_setop(other)
773+
774+
if self.equals(other):
775+
return self._get_reconciled_name_object(other)
776+
768777
if not isinstance(other, DatetimeIndex):
769778
try:
770779
other = DatetimeIndex(other)

pandas/core/indexes/interval.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from pandas.core.indexes.base import (
2626
Index, ensure_index,
2727
default_pprint, _index_shared_docs)
28+
from pandas.core.ops import get_op_result_name
2829

2930
from pandas._libs import Timestamp, Timedelta
3031
from pandas._libs.interval import (
@@ -1048,7 +1049,7 @@ def func(self, other):
10481049
raise TypeError(msg.format(op=op_name))
10491050

10501051
result = getattr(self._multiindex, op_name)(other._multiindex)
1051-
result_name = self.name if self.name == other.name else None
1052+
result_name = get_op_result_name(self, other)
10521053

10531054
# GH 19101: ensure empty results have correct dtype
10541055
if result.empty:

pandas/core/indexes/numeric.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from pandas.util._decorators import Appender, cache_readonly
2222
import pandas.core.dtypes.concat as _concat
2323
import pandas.core.indexes.base as ibase
24-
24+
from pandas.core.ops import get_op_result_name
2525

2626
_num_index_shared_docs = dict()
2727

@@ -215,7 +215,7 @@ def _convert_scalar_indexer(self, key, kind=None):
215215
._convert_scalar_indexer(key, kind=kind))
216216

217217
def _wrap_joined_index(self, joined, other):
218-
name = self.name if self.name == other.name else None
218+
name = get_op_result_name(self, other)
219219
return Int64Index(joined, name=name)
220220

221221
@classmethod
@@ -288,7 +288,7 @@ def _convert_index_indexer(self, keyarr):
288288
return keyarr
289289

290290
def _wrap_joined_index(self, joined, other):
291-
name = self.name if self.name == other.name else None
291+
name = get_op_result_name(self, other)
292292
return UInt64Index(joined, name=name)
293293

294294
@classmethod

pandas/core/indexes/period.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212
is_integer_dtype,
1313
is_datetime64_any_dtype,
1414
is_bool_dtype,
15-
pandas_dtype,
15+
pandas_dtype
1616
)
17-
17+
from pandas.core.ops import get_op_result_name
1818
from pandas.core.accessor import PandasDelegate, delegate_names
1919
from pandas.core.indexes.datetimes import DatetimeIndex, Int64Index, Index
2020
from pandas.core.indexes.datetimelike import (
@@ -848,8 +848,8 @@ def _assert_can_do_setop(self, other):
848848
msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr)
849849
raise IncompatibleFrequency(msg)
850850

851-
def _wrap_union_result(self, other, result):
852-
name = self.name if self.name == other.name else None
851+
def _wrap_setop_result(self, other, result):
852+
name = get_op_result_name(self, other)
853853
result = self._apply_meta(result)
854854
result.name = name
855855
return result

pandas/core/indexes/range.py

+18-8
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import numpy as np
77

8-
from pandas._libs import index as libindex
8+
from pandas._libs import index as libindex, lib
99
import pandas.compat as compat
1010
from pandas.compat import get_range_parameters, lrange, range
1111
from pandas.compat.numpy import function as nv
@@ -263,8 +263,9 @@ def tolist(self):
263263
@Appender(_index_shared_docs['_shallow_copy'])
264264
def _shallow_copy(self, values=None, **kwargs):
265265
if values is None:
266+
name = kwargs.get("name", self.name)
266267
return RangeIndex._simple_new(
267-
name=self.name, **dict(self._get_data_as_items()))
268+
name=name, **dict(self._get_data_as_items()))
268269
else:
269270
kwargs.setdefault('name', self.name)
270271
return self._int64index._shallow_copy(values, **kwargs)
@@ -344,6 +345,10 @@ def intersection(self, other):
344345
-------
345346
intersection : Index
346347
"""
348+
349+
if self.equals(other):
350+
return self._get_reconciled_name_object(other)
351+
347352
if not isinstance(other, RangeIndex):
348353
return super(RangeIndex, self).intersection(other)
349354

@@ -424,10 +429,9 @@ def union(self, other):
424429
union : Index
425430
"""
426431
self._assert_can_do_setop(other)
427-
if len(other) == 0 or self.equals(other):
428-
return self
429-
if len(self) == 0:
430-
return other
432+
if len(other) == 0 or self.equals(other) or len(self) == 0:
433+
return super(RangeIndex, self).union(other)
434+
431435
if isinstance(other, RangeIndex):
432436
start_s, step_s = self._start, self._step
433437
end_s = self._start + self._step * (len(self) - 1)
@@ -498,7 +502,12 @@ def __getitem__(self, key):
498502
super_getitem = super(RangeIndex, self).__getitem__
499503

500504
if is_scalar(key):
501-
n = int(key)
505+
if not lib.is_integer(key):
506+
raise IndexError("only integers, slices (`:`), "
507+
"ellipsis (`...`), numpy.newaxis (`None`) "
508+
"and integer or boolean "
509+
"arrays are valid indices")
510+
n = com.cast_scalar_indexer(key)
502511
if n != key:
503512
return super_getitem(key)
504513
if n < 0:
@@ -649,7 +658,8 @@ def _evaluate_numeric_binop(self, other):
649658
return op(self._int64index, other)
650659
# TODO: Do attrs get handled reliably?
651660

652-
return _evaluate_numeric_binop
661+
name = '__{name}__'.format(name=op.__name__)
662+
return compat.set_function_name(_evaluate_numeric_binop, name, cls)
653663

654664
cls.__add__ = _make_evaluate_binop(operator.add)
655665
cls.__radd__ = _make_evaluate_binop(ops.radd)

pandas/core/indexes/timedeltas.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from pandas.core.base import _shared_docs
2727
from pandas.core.indexes.base import _index_shared_docs
2828
import pandas.core.common as com
29+
from pandas.core.ops import get_op_result_name
2930
import pandas.core.dtypes.concat as _concat
3031
from pandas.util._decorators import Appender, Substitution
3132
from pandas.core.indexes.datetimelike import (
@@ -281,6 +282,10 @@ def union(self, other):
281282
y : Index or TimedeltaIndex
282283
"""
283284
self._assert_can_do_setop(other)
285+
286+
if len(other) == 0 or self.equals(other) or len(self) == 0:
287+
return super(TimedeltaIndex, self).union(other)
288+
284289
if not isinstance(other, TimedeltaIndex):
285290
try:
286291
other = TimedeltaIndex(other)
@@ -313,7 +318,7 @@ def join(self, other, how='left', level=None, return_indexers=False,
313318
sort=sort)
314319

315320
def _wrap_joined_index(self, joined, other):
316-
name = self.name if self.name == other.name else None
321+
name = get_op_result_name(self, other)
317322
if (isinstance(other, TimedeltaIndex) and self.freq == other.freq and
318323
self._can_fast_union(other)):
319324
joined = self._shallow_copy(joined, name=name)
@@ -373,10 +378,6 @@ def _fast_union(self, other):
373378
else:
374379
return left
375380

376-
def _wrap_union_result(self, other, result):
377-
name = self.name if self.name == other.name else None
378-
return self._simple_new(result, name=name, freq=None)
379-
380381
def intersection(self, other):
381382
"""
382383
Specialized intersection for TimedeltaIndex objects. May be much faster
@@ -391,6 +392,10 @@ def intersection(self, other):
391392
y : Index or TimedeltaIndex
392393
"""
393394
self._assert_can_do_setop(other)
395+
396+
if self.equals(other):
397+
return self._get_reconciled_name_object(other)
398+
394399
if not isinstance(other, TimedeltaIndex):
395400
try:
396401
other = TimedeltaIndex(other)

pandas/tests/indexes/common.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -360,10 +360,10 @@ def test_has_duplicates(self, indices):
360360
def test_duplicated(self, indices, keep):
361361
if type(indices) is not self._holder:
362362
pytest.skip('Can only check if we know the index type')
363-
if not len(indices) or isinstance(indices, MultiIndex):
363+
if not len(indices) or isinstance(indices, (MultiIndex, RangeIndex)):
364364
# MultiIndex tested separately in:
365365
# tests/indexes/multi/test_unique_and_duplicates
366-
pytest.skip('Skip check for empty Index and MultiIndex')
366+
pytest.skip('Skip check for empty Index, MultiIndex, RangeIndex')
367367

368368
idx = self._holder(indices)
369369
if idx.has_duplicates:

pandas/tests/indexes/conftest.py

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
tm.makeTimedeltaIndex(100),
1616
tm.makeIntIndex(100),
1717
tm.makeUIntIndex(100),
18+
tm.makeRangeIndex(100),
1819
tm.makeFloatIndex(100),
1920
Index([True, False]),
2021
tm.makeCategoricalIndex(100),

0 commit comments

Comments
 (0)