From aad588429c0cea52c5aeb660b66fb106ab35fb02 Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Thu, 22 Feb 2018 16:59:17 -0500 Subject: [PATCH 01/12] BUG: names on union and intersection for Index were inconsistent (GH9943 GH9862) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/indexes/base.py | 39 +++++++++++------------ pandas/core/indexes/datetimes.py | 7 +++-- pandas/core/indexes/interval.py | 3 +- pandas/core/indexes/numeric.py | 6 ++-- pandas/core/indexes/period.py | 5 +-- pandas/core/indexes/timedeltas.py | 7 +++-- pandas/tests/indexes/test_base.py | 51 ++++++++++++++++++++++++++----- 8 files changed, 78 insertions(+), 41 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index fb22dc40e335f..a3cbcdd016355 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -837,6 +837,7 @@ Indexing - Bug in :func:`IntervalIndex.symmetric_difference` where the symmetric difference with a non-``IntervalIndex`` did not raise (:issue:`18475`) - Bug in :class:`IntervalIndex` where set operations that returned an empty ``IntervalIndex`` had the wrong dtype (:issue:`19101`) - Bug in :meth:`DataFrame.drop_duplicates` where no ``KeyError`` is raised when passing in columns that don't exist on the ``DataFrame`` (issue:`19726`) +- Bug in :func:`Index.union` where resulting names were not computed correctly for certain cases (:issue:`9943`, :issue:`9862`) MultiIndex diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 0813c12d573d5..8dfcdb1da28ce 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -57,7 +57,7 @@ import pandas.core.algorithms as algos import pandas.core.sorting as sorting from pandas.io.formats.printing import pprint_thing -from pandas.core.ops import make_invalid_op +from pandas.core.ops import make_invalid_op, get_op_result_name from pandas.core.config import get_option from pandas.core.strings import StringMethods @@ -1191,7 +1191,7 @@ def _convert_can_do_setop(self, other): other = Index(other, name=self.name) result_name = self.name else: - result_name = self.name if self.name == other.name else None + result_name = get_op_result_name(self, other) return other, result_name def _convert_for_op(self, value): @@ -2263,19 +2263,15 @@ def __or__(self, other): def __xor__(self, other): return self.symmetric_difference(other) - def _get_consensus_name(self, other): + def _get_setop_name_object(self, other): """ - Given 2 indexes, give a consensus name meaning - we take the not None one, or None if the names differ. - Return a new object if we are resetting the name + Given 2 indexes, give a setop name and object, meaning + we use get_op_result_name to return the name, and then + return a new object if we are resetting the name """ - if self.name != other.name: - if self.name is None or other.name is None: - name = self.name or other.name - else: - name = None - if self.name != name: - return self._shallow_copy(name=name) + name = get_op_result_name(self, other) + if self.name != name: + return self._shallow_copy(name=name) return self def union(self, other): @@ -2303,10 +2299,10 @@ def union(self, other): other = _ensure_index(other) if len(other) == 0 or self.equals(other): - return self._get_consensus_name(other) + return self._get_setop_name_object(other) if len(self) == 0: - return other._get_consensus_name(self) + return other._get_setop_name_object(self) # TODO: is_dtype_union_equal is a hack around # 1. buggy set ops with duplicates (GH #13432) @@ -2369,11 +2365,10 @@ def union(self, other): stacklevel=3) # for subclasses - return self._wrap_union_result(other, result) + return self._wrap_setop_result(other, result) - def _wrap_union_result(self, other, result): - name = self.name if self.name == other.name else None - return self.__class__(result, name=name) + def _wrap_setop_result(self, other, result): + return self.__class__(result, name=get_op_result_name(self, other)) def intersection(self, other): """ @@ -2403,7 +2398,7 @@ def intersection(self, other): other = _ensure_index(other) if self.equals(other): - return self._get_consensus_name(other) + return self._get_setop_name_object(other) if not is_dtype_equal(self.dtype, other.dtype): this = self.astype('O') @@ -2423,7 +2418,7 @@ def intersection(self, other): if self.is_monotonic and other.is_monotonic: try: result = self._inner_indexer(lvals, rvals)[0] - return self._wrap_union_result(other, result) + return self._wrap_setop_result(other, result) except TypeError: pass @@ -3552,7 +3547,7 @@ def _join_monotonic(self, other, how='left', return_indexers=False): return join_index def _wrap_joined_index(self, joined, other): - name = self.name if self.name == other.name else None + name = get_op_result_name(self, other) return Index(joined, name=name) def _get_string_slice(self, key, use_lhs=True, use_rhs=True): diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 55d8b7c18a622..19203f4142567 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -38,6 +38,7 @@ from pandas.core.indexes.base import Index, _index_shared_docs from pandas.core.indexes.numeric import Int64Index, Float64Index +from pandas.core.ops import get_op_result_name import pandas.compat as compat from pandas.tseries.frequencies import to_offset, get_period_alias, Resolution from pandas.core.indexes.datetimelike import ( @@ -1237,7 +1238,7 @@ def _maybe_utc_convert(self, other): return this, other def _wrap_joined_index(self, joined, other): - name = self.name if self.name == other.name else None + name = get_op_result_name(self, other) if (isinstance(other, DatetimeIndex) and self.offset == other.offset and self._can_fast_union(other)): @@ -1333,8 +1334,8 @@ def __iter__(self): box="timestamp") return iter(converted) - def _wrap_union_result(self, other, result): - name = self.name if self.name == other.name else None + def _wrap_setop_result(self, other, result): + name = get_op_result_name(self, other) if not timezones.tz_compare(self.tz, other.tz): raise ValueError('Passed item and index have different timezone') return self._simple_new(result, name=name, freq=None, tz=self.tz) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index d431ea1e51e31..46a5ada60af90 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -26,6 +26,7 @@ from pandas.core.indexes.base import ( Index, _ensure_index, default_pprint, _index_shared_docs) +from pandas.core.ops import get_op_result_name from pandas._libs import Timestamp, Timedelta from pandas._libs.interval import ( @@ -1351,7 +1352,7 @@ def func(self, other): raise TypeError(msg.format(op=op_name)) result = getattr(self._multiindex, op_name)(other._multiindex) - result_name = self.name if self.name == other.name else None + result_name = get_op_result_name(self, other) # GH 19101: ensure empty results have correct dtype if result.empty: diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index a4558116bfa63..81a1c344d7fe9 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -18,7 +18,7 @@ from pandas.util._decorators import Appender, cache_readonly import pandas.core.dtypes.concat as _concat import pandas.core.indexes.base as ibase - +from pandas.core.ops import get_op_result_name _num_index_shared_docs = dict() @@ -187,7 +187,7 @@ def _convert_scalar_indexer(self, key, kind=None): ._convert_scalar_indexer(key, kind=kind)) def _wrap_joined_index(self, joined, other): - name = self.name if self.name == other.name else None + name = get_op_result_name(self, other) return Int64Index(joined, name=name) @classmethod @@ -264,7 +264,7 @@ def _convert_index_indexer(self, keyarr): return keyarr def _wrap_joined_index(self, joined, other): - name = self.name if self.name == other.name else None + name = get_op_result_name(self, other) return UInt64Index(joined, name=name) @classmethod diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index f0567c9c963af..4f4f222e4c137 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -19,6 +19,7 @@ _ensure_object) from pandas.core.dtypes.dtypes import PeriodDtype from pandas.core.dtypes.generic import ABCSeries +from pandas.core.ops import get_op_result_name import pandas.tseries.frequencies as frequencies from pandas.tseries.frequencies import get_freq_code as _gfc @@ -996,8 +997,8 @@ def _assert_can_do_setop(self, other): msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) - def _wrap_union_result(self, other, result): - name = self.name if self.name == other.name else None + def _wrap_setop_result(self, other, result): + name = get_op_result_name(self, other) result = self._apply_meta(result) result.name = name return result diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index eebd52d7fb801..9b3b179f94dc3 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -26,6 +26,7 @@ from pandas.core.base import _shared_docs from pandas.core.indexes.base import _index_shared_docs import pandas.core.common as com +from pandas.core.ops import get_op_result_name import pandas.core.dtypes.concat as _concat from pandas.util._decorators import Appender, Substitution, deprecate_kwarg from pandas.core.indexes.datetimelike import TimelikeOps, DatetimeIndexOpsMixin @@ -577,7 +578,7 @@ def join(self, other, how='left', level=None, return_indexers=False, sort=sort) def _wrap_joined_index(self, joined, other): - name = self.name if self.name == other.name else None + name = get_op_result_name(self, other) if (isinstance(other, TimedeltaIndex) and self.freq == other.freq and self._can_fast_union(other)): joined = self._shallow_copy(joined, name=name) @@ -637,8 +638,8 @@ def _fast_union(self, other): else: return left - def _wrap_union_result(self, other, result): - name = self.name if self.name == other.name else None + def _wrap_setop_result(self, other, result): + name = get_op_result_name(self, other) return self._simple_new(result, name=name, freq=None) def intersection(self, other): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index d7f185853ca45..afc8c78bfc996 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -730,7 +730,15 @@ def test_union(self): union = Index([]).union(first) assert union is first - # preserve names + # preserve names only when they are the same + # GH 9943 9862 + + first = Index(list('ab'), name='A') + second = Index(list('abc'), name='A') + union = first.union(second) + expected = Index(list('abc'), name='A') + tm.assert_index_equal(union, expected) + first = Index(list('ab'), name='A') second = Index(list('ab'), name='B') union = first.union(second) @@ -752,37 +760,66 @@ def test_union(self): first = Index(list('ab')) second = Index(list('ab'), name='B') union = first.union(second) - expected = Index(list('ab'), name='B') + expected = Index(list('ab'), name=None) + tm.assert_index_equal(union, expected) + + # GH 9943 9862 + first = Index(list('abc')) + second = Index(list('ab'), name='B') + union = first.union(second) + expected = Index(list('abc'), name=None) tm.assert_index_equal(union, expected) first = Index([]) second = Index(list('ab'), name='B') union = first.union(second) - expected = Index(list('ab'), name='B') + expected = Index(list('ab'), name=None) tm.assert_index_equal(union, expected) first = Index(list('ab')) second = Index([], name='B') union = first.union(second) - expected = Index(list('ab'), name='B') + expected = Index(list('ab'), name=None) tm.assert_index_equal(union, expected) first = Index(list('ab'), name='A') second = Index(list('ab')) union = first.union(second) - expected = Index(list('ab'), name='A') + expected = Index(list('ab'), name=None) + tm.assert_index_equal(union, expected) + + # GH 9943 9862 + first = Index(list('ab'), name='A') + second = Index(list('abc')) + union = first.union(second) + expected = Index(list('abc'), name=None) tm.assert_index_equal(union, expected) first = Index(list('ab'), name='A') second = Index([]) union = first.union(second) - expected = Index(list('ab'), name='A') + expected = Index(list('ab'), name=None) tm.assert_index_equal(union, expected) first = Index([], name='A') second = Index(list('ab')) union = first.union(second) - expected = Index(list('ab'), name='A') + expected = Index(list('ab'), name=None) + tm.assert_index_equal(union, expected) + + # Chained unions handles names correctly + i1 = Index([1, 2], name='i1') + i2 = Index([3, 4], name='i2') + i3 = Index([5, 6], name='i3') + union = i1.union(i2.union(i3)) + expected = i1.union(i2).union(i3) + tm.assert_index_equal(union, expected) + + j1 = Index([1, 2], name='j1') + j2 = Index([], name='j2') + j3 = Index([], name='j3') + union = j1.union(j2.union(j3)) + expected = j1.union(j2).union(j3) tm.assert_index_equal(union, expected) with tm.assert_produces_warning(RuntimeWarning): From 3ef2a693479376125cce4af2e952678aa3b325df Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Thu, 1 Mar 2018 13:50:31 -0500 Subject: [PATCH 02/12] Use _shallow_copy(). Fix issue in Index.difference. Make tests try variety of index types. --- doc/source/whatsnew/v0.23.0.txt | 3 +- pandas/core/indexes/base.py | 51 +++++-- pandas/core/indexes/category.py | 14 ++ pandas/core/indexes/datetimes.py | 9 ++ pandas/core/indexes/multi.py | 4 +- pandas/core/indexes/period.py | 6 + pandas/core/indexes/range.py | 21 ++- pandas/core/indexes/timedeltas.py | 13 +- pandas/tests/indexes/test_base.py | 208 +++++++++++++++------------- pandas/tests/reshape/test_concat.py | 8 +- 10 files changed, 215 insertions(+), 122 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index a3cbcdd016355..50a1a9f62c59b 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -837,7 +837,8 @@ Indexing - Bug in :func:`IntervalIndex.symmetric_difference` where the symmetric difference with a non-``IntervalIndex`` did not raise (:issue:`18475`) - Bug in :class:`IntervalIndex` where set operations that returned an empty ``IntervalIndex`` had the wrong dtype (:issue:`19101`) - Bug in :meth:`DataFrame.drop_duplicates` where no ``KeyError`` is raised when passing in columns that don't exist on the ``DataFrame`` (issue:`19726`) -- Bug in :func:`Index.union` where resulting names were not computed correctly for certain cases (:issue:`9943`, :issue:`9862`) +- Bug in :func:`Index.union` and :func:`Index.intersection` where name of the `Index` of the result was not computed correctly for certain cases (:issue:`9943`, :issue:`9862`) +- Bug in :func:`Index.difference` when taking difference of set and itself as type was not preserved. MultiIndex diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 8dfcdb1da28ce..ab0f3665db9df 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1240,9 +1240,9 @@ def set_names(self, names, level=None, inplace=False): Examples -------- >>> Index([1, 2, 3, 4]).set_names('foo') - Int64Index([1, 2, 3, 4], dtype='int64') + Int64Index([1, 2, 3, 4], dtype='int64', name='foo') >>> Index([1, 2, 3, 4]).set_names(['foo']) - Int64Index([1, 2, 3, 4], dtype='int64') + Int64Index([1, 2, 3, 4], dtype='int64', name='foo') >>> idx = MultiIndex.from_tuples([(1, u'one'), (1, u'two'), (2, u'one'), (2, u'two')], names=['foo', 'bar']) @@ -2263,17 +2263,37 @@ def __or__(self, other): def __xor__(self, other): return self.symmetric_difference(other) - def _get_setop_name_object(self, other): + def _get_reconciled_name_object(self, other): """ - Given 2 indexes, give a setop name and object, meaning - we use get_op_result_name to return the name, and then - return a new object if we are resetting the name + If the result of a set operation will be self, + return self, unless the name changes, in which + case make a shallow copy of self. """ name = get_op_result_name(self, other) if self.name != name: return self._shallow_copy(name=name) return self + def _union_corner_case(self, other): + """ + If self or other have no length, or self and other + are the same, then return self, after reconciling names + + Returns + ------- + Tuple of (is_corner, result), where is_corner is True if + it is a corner case, and result is the reconciled result + + """ + # GH 9943 9862 + if len(other) == 0 or self.equals(other): + return (True, self._get_reconciled_name_object(other)) + + if len(self) == 0: + return (True, other._get_reconciled_name_object(self)) + + return (False, None) + def union(self, other): """ Form the union of two Index objects and sorts if possible. @@ -2298,11 +2318,9 @@ def union(self, other): self._assert_can_do_setop(other) other = _ensure_index(other) - if len(other) == 0 or self.equals(other): - return self._get_setop_name_object(other) - - if len(self) == 0: - return other._get_setop_name_object(self) + is_corner_case, corner_result = self._union_corner_case(other) + if is_corner_case: + return corner_result # TODO: is_dtype_union_equal is a hack around # 1. buggy set ops with duplicates (GH #13432) @@ -2398,7 +2416,7 @@ def intersection(self, other): other = _ensure_index(other) if self.equals(other): - return self._get_setop_name_object(other) + return self._get_reconciled_name_object(other) if not is_dtype_equal(self.dtype, other.dtype): this = self.astype('O') @@ -2436,6 +2454,13 @@ def intersection(self, other): taken.name = None return taken + def _create_empty_index(self, name): + """ + Returns an empty index. Overridden as necessary by + subclasses that have different constructors. + """ + return self.__class__([], name=name) + def difference(self, other): """ Return a new Index with elements from the index that are not in @@ -2464,7 +2489,7 @@ def difference(self, other): self._assert_can_do_setop(other) if self.equals(other): - return Index([], name=self.name) + return self._create_empty_index(get_op_result_name(self, other)) other, result_name = self._convert_can_do_setop(other) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 218851b1713f2..21700a507c5a0 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -25,6 +25,7 @@ import pandas.core.base as base import pandas.core.missing as missing import pandas.core.indexes.base as ibase +from pandas.core.ops import get_op_result_name _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update(dict(target_klass='CategoricalIndex')) @@ -300,6 +301,12 @@ def itemsize(self): # Size of the items in categories, not codes. return self.values.itemsize + def _wrap_setop_result(self, other, result): + name = get_op_result_name(self, other) + return self._simple_new(result, name=name, + categories=self.categories, + ordered=self.ordered) + def get_values(self): """ return the underlying data as an ndarray """ return self._data.get_values() @@ -716,6 +723,13 @@ def insert(self, loc, item): codes = np.concatenate((codes[:loc], code, codes[loc:])) return self._create_from_codes(codes) + def _create_empty_index(self, name): + """ + Returns an empty index using categories and ordered of self + """ + return CategoricalIndex([], categories=self.categories, + ordered=self.ordered, name=name) + def _concat(self, to_concat, name): # if calling index is category, don't check dtype of others return CategoricalIndex._concat_same_dtype(self, to_concat, name) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 19203f4142567..56675560d7828 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1137,6 +1137,11 @@ def union(self, other): y : Index or DatetimeIndex """ self._assert_can_do_setop(other) + + is_corner_case, corner_result = self._union_corner_case(other) + if is_corner_case: + return corner_result + if not isinstance(other, DatetimeIndex): try: other = DatetimeIndex(other) @@ -1354,6 +1359,10 @@ def intersection(self, other): y : Index or DatetimeIndex """ self._assert_can_do_setop(other) + + if self.equals(other): + return self._get_reconciled_name_object(other) + if not isinstance(other, DatetimeIndex): try: other = DatetimeIndex(other) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 73f4aee1c4880..bc64e1de9219c 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2733,7 +2733,7 @@ def intersection(self, other): other_tuples = other._ndarray_values uniq_tuples = sorted(set(self_tuples) & set(other_tuples)) if len(uniq_tuples) == 0: - return MultiIndex(levels=[[]] * self.nlevels, + return MultiIndex(levels=self.levels, labels=[[]] * self.nlevels, names=result_names, verify_integrity=False) else: @@ -2755,7 +2755,7 @@ def difference(self, other): return self if self.equals(other): - return MultiIndex(levels=[[]] * self.nlevels, + return MultiIndex(levels=self.levels, labels=[[]] * self.nlevels, names=result_names, verify_integrity=False) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 4f4f222e4c137..7c51b39ee7ed5 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -963,6 +963,12 @@ def _convert_tolerance(self, tolerance, target): 'target index size') return self._maybe_convert_timedelta(tolerance) + def _create_empty_index(self, name): + """ + Returns an empty index using freq of self + """ + return PeriodIndex([], freq=self.freq, name=name) + def insert(self, loc, item): if not isinstance(item, Period) or self.freq != item.freq: return self.astype(object).insert(loc, item) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 9d770cffb0059..c308ef6d219a5 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -256,7 +256,8 @@ def tolist(self): @Appender(_index_shared_docs['_shallow_copy']) def _shallow_copy(self, values=None, **kwargs): if values is None: - return RangeIndex(name=self.name, fastpath=True, + name = kwargs.get("name", self.name) + return RangeIndex(name=name, fastpath=True, **dict(self._get_data_as_items())) else: kwargs.setdefault('name', self.name) @@ -337,6 +338,10 @@ def intersection(self, other): ------- intersection : Index """ + + if self.equals(other): + return self._get_reconciled_name_object(other) + if not isinstance(other, RangeIndex): return super(RangeIndex, self).intersection(other) @@ -417,10 +422,10 @@ def union(self, other): union : Index """ self._assert_can_do_setop(other) - if len(other) == 0 or self.equals(other): - return self - if len(self) == 0: - return other + is_corner_case, corner_result = self._union_corner_case(other) + if is_corner_case: + return corner_result + if isinstance(other, RangeIndex): start_s, step_s = self._start, self._step end_s = self._start + self._step * (len(self) - 1) @@ -474,6 +479,12 @@ def join(self, other, how='left', level=None, return_indexers=False, def _concat_same_dtype(self, indexes, name): return _concat._concat_rangeindex_same_dtype(indexes).rename(name) + def _create_empty_index(self, name): + """ + Returns an empty index using step size of self + """ + return RangeIndex(start=None, stop=None, step=self._step, name=name) + def __len__(self): """ return the length of the RangeIndex diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 9b3b179f94dc3..ade9737978a9b 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -546,6 +546,11 @@ def union(self, other): y : Index or TimedeltaIndex """ self._assert_can_do_setop(other) + + is_corner_case, corner_result = self._union_corner_case(other) + if is_corner_case: + return corner_result + if not isinstance(other, TimedeltaIndex): try: other = TimedeltaIndex(other) @@ -638,10 +643,6 @@ def _fast_union(self, other): else: return left - def _wrap_setop_result(self, other, result): - name = get_op_result_name(self, other) - return self._simple_new(result, name=name, freq=None) - def intersection(self, other): """ Specialized intersection for TimedeltaIndex objects. May be much faster @@ -656,6 +657,10 @@ def intersection(self, other): y : Index or TimedeltaIndex """ self._assert_can_do_setop(other) + + if self.equals(other): + return self._get_reconciled_name_object(other) + if not isinstance(other, TimedeltaIndex): try: other = TimedeltaIndex(other) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index afc8c78bfc996..22b4aab512f28 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -44,7 +44,7 @@ def setup_method(self, method): tdIndex=tm.makeTimedeltaIndex(100), intIndex=tm.makeIntIndex(100), uintIndex=tm.makeUIntIndex(100), - rangeIndex=tm.makeIntIndex(100), + rangeIndex=tm.makeRangeIndex(100), floatIndex=tm.makeFloatIndex(100), boolIndex=Index([True, False]), catIndex=tm.makeCategoricalIndex(100), @@ -707,6 +707,98 @@ def test_intersect_str_dates(self): assert len(res) == 0 + def create_empty_index(self, id): + """ + Given an index, create an empty index of the + same class. Use drop to create it + to avoid worrying about extra args in + constructor + """ + return id.drop(id) + + def test_corner_union(self): + # GH 9943 9862 + # Test unions with various name combinations + # Do not test MultiIndex + + skip_index_keys = ['tuples', 'repeats'] + for key, id in self.indices.items(): + if key not in skip_index_keys: + first = id.copy().set_names('A') + second = id.copy().set_names('A') + union = first.union(second) + expected = id.copy().set_names('A') + tm.assert_index_equal(union, expected) + + first = id.copy().set_names('A') + second = id.copy().set_names('B') + union = first.union(second) + expected = id.copy().set_names(None) + tm.assert_index_equal(union, expected) + + first = id.copy().set_names('A') + second = self.create_empty_index(id).set_names('B') + union = first.union(second) + expected = id.copy().set_names(None) + tm.assert_index_equal(union, expected) + + first = self.create_empty_index(id).set_names('A') + second = id.copy().set_names('B') + union = first.union(second) + expected = id.copy().set_names(None) + tm.assert_index_equal(union, expected) + + first = id.copy().set_names(None) + second = id.copy().set_names('B') + union = first.union(second) + expected = id.copy().set_names(None) + tm.assert_index_equal(union, expected) + + first = self.create_empty_index(id).set_names(None) + second = id.copy().set_names('B') + union = first.union(second) + expected = id.copy().set_names(None) + tm.assert_index_equal(union, expected) + + first = id.copy().set_names(None) + second = self.create_empty_index(id).set_names('B') + union = first.union(second) + expected = id.copy().set_names(None) + tm.assert_index_equal(union, expected) + + first = id.copy().set_names('A') + second = id.copy().set_names(None) + union = first.union(second) + expected = id.copy().set_names(None) + tm.assert_index_equal(union, expected) + + first = id.copy().set_names('A') + second = self.create_empty_index(id).set_names(None) + union = first.union(second) + expected = id.copy().set_names(None) + tm.assert_index_equal(union, expected) + + first = self.create_empty_index(id).set_names('A') + second = id.copy().set_names(None) + union = first.union(second) + expected = id.copy().set_names(None) + tm.assert_index_equal(union, expected) + + # Chained unions handles names correctly + i1 = Index([1, 2], name='i1') + i2 = Index([3, 4], name='i2') + i3 = Index([5, 6], name='i3') + union = i1.union(i2.union(i3)) + expected = i1.union(i2).union(i3) + tm.assert_index_equal(union, expected) + + j1 = Index([1, 2], name='j1') + j2 = Index([], name='j2') + j3 = Index([], name='j3') + union = j1.union(j2.union(j3)) + expected = j1.union(j2).union(j3) + tm.assert_index_equal(union, expected) + def test_union(self): first = self.strIndex[5:20] second = self.strIndex[:10] @@ -730,98 +822,6 @@ def test_union(self): union = Index([]).union(first) assert union is first - # preserve names only when they are the same - # GH 9943 9862 - - first = Index(list('ab'), name='A') - second = Index(list('abc'), name='A') - union = first.union(second) - expected = Index(list('abc'), name='A') - tm.assert_index_equal(union, expected) - - first = Index(list('ab'), name='A') - second = Index(list('ab'), name='B') - union = first.union(second) - expected = Index(list('ab'), name=None) - tm.assert_index_equal(union, expected) - - first = Index(list('ab'), name='A') - second = Index([], name='B') - union = first.union(second) - expected = Index(list('ab'), name=None) - tm.assert_index_equal(union, expected) - - first = Index([], name='A') - second = Index(list('ab'), name='B') - union = first.union(second) - expected = Index(list('ab'), name=None) - tm.assert_index_equal(union, expected) - - first = Index(list('ab')) - second = Index(list('ab'), name='B') - union = first.union(second) - expected = Index(list('ab'), name=None) - tm.assert_index_equal(union, expected) - - # GH 9943 9862 - first = Index(list('abc')) - second = Index(list('ab'), name='B') - union = first.union(second) - expected = Index(list('abc'), name=None) - tm.assert_index_equal(union, expected) - - first = Index([]) - second = Index(list('ab'), name='B') - union = first.union(second) - expected = Index(list('ab'), name=None) - tm.assert_index_equal(union, expected) - - first = Index(list('ab')) - second = Index([], name='B') - union = first.union(second) - expected = Index(list('ab'), name=None) - tm.assert_index_equal(union, expected) - - first = Index(list('ab'), name='A') - second = Index(list('ab')) - union = first.union(second) - expected = Index(list('ab'), name=None) - tm.assert_index_equal(union, expected) - - # GH 9943 9862 - first = Index(list('ab'), name='A') - second = Index(list('abc')) - union = first.union(second) - expected = Index(list('abc'), name=None) - tm.assert_index_equal(union, expected) - - first = Index(list('ab'), name='A') - second = Index([]) - union = first.union(second) - expected = Index(list('ab'), name=None) - tm.assert_index_equal(union, expected) - - first = Index([], name='A') - second = Index(list('ab')) - union = first.union(second) - expected = Index(list('ab'), name=None) - tm.assert_index_equal(union, expected) - - # Chained unions handles names correctly - i1 = Index([1, 2], name='i1') - i2 = Index([3, 4], name='i2') - i3 = Index([5, 6], name='i3') - union = i1.union(i2.union(i3)) - expected = i1.union(i2).union(i3) - tm.assert_index_equal(union, expected) - - j1 = Index([1, 2], name='j1') - j2 = Index([], name='j2') - j3 = Index([], name='j3') - union = j1.union(j2.union(j3)) - expected = j1.union(j2).union(j3) - tm.assert_index_equal(union, expected) - with tm.assert_produces_warning(RuntimeWarning): firstCat = self.strIndex.union(self.dateIndex) secondCat = self.strIndex.union(self.strIndex) @@ -997,6 +997,17 @@ def test_iadd_string(self): index += '_x' assert 'a_x' in index + def test_difference_type(self): + # GH 9943 9862 + # If taking difference of a set and itself, it + # needs to preserve the type of the index + skip_index_keys = ['repeats'] + for key, id in self.indices.items(): + if key not in skip_index_keys: + result = id.difference(id) + expected = self.create_empty_index(id) + tm.assert_index_equal(result, expected) + def test_difference(self): first = self.strIndex[5:20] @@ -1072,6 +1083,17 @@ def test_symmetric_difference(self): assert tm.equalContents(result, expected) assert result.name == 'new_name' + def test_intersection_difference(self): + # Test that the intersection of an index with an + # empty index produces the same index as the difference + # of an index with itself. Test for all types + skip_index_keys = ['repeats'] + for key, id in self.indices.items(): + if key not in skip_index_keys: + inter = id.intersection(self.create_empty_index(id)) + diff = id.difference(id) + tm.assert_index_equal(inter, diff) + def test_is_numeric(self): assert not self.dateIndex.is_numeric() assert not self.strIndex.is_numeric() diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 437b4179c580a..02d118d49ffe9 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -2055,10 +2055,10 @@ def test_concat_categoricalindex(self): result = pd.concat([a, b, c], axis=1) - exp_idx = pd.CategoricalIndex([0, 1, 2, 9]) - exp = pd.DataFrame({0: [1, np.nan, np.nan, 1], - 1: [2, 2, np.nan, np.nan], - 2: [np.nan, 3, 3, np.nan]}, + exp_idx = pd.CategoricalIndex([9, 0, 1, 2], categories=categories) + exp = pd.DataFrame({0: [1, 1, np.nan, np.nan], + 1: [np.nan, 2, 2, np.nan], + 2: [np.nan, np.nan, 3, 3]}, columns=[0, 1, 2], index=exp_idx) tm.assert_frame_equal(result, exp) From ba9ccdd7651812090dfd6c3cee0392378b68cd07 Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Mon, 5 Mar 2018 13:42:04 -0500 Subject: [PATCH 03/12] Updated whatsnew --- doc/source/whatsnew/v0.23.0.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 50a1a9f62c59b..346a0648130bd 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -837,8 +837,8 @@ Indexing - Bug in :func:`IntervalIndex.symmetric_difference` where the symmetric difference with a non-``IntervalIndex`` did not raise (:issue:`18475`) - Bug in :class:`IntervalIndex` where set operations that returned an empty ``IntervalIndex`` had the wrong dtype (:issue:`19101`) - Bug in :meth:`DataFrame.drop_duplicates` where no ``KeyError`` is raised when passing in columns that don't exist on the ``DataFrame`` (issue:`19726`) -- Bug in :func:`Index.union` and :func:`Index.intersection` where name of the `Index` of the result was not computed correctly for certain cases (:issue:`9943`, :issue:`9862`) -- Bug in :func:`Index.difference` when taking difference of set and itself as type was not preserved. +- Bug in :func:`Index.union` and :func:`Index.intersection` where name of the ``Index`` of the result was not computed correctly for certain cases (:issue:`9943`, :issue:`9862`) +- Bug in :func:`Index.difference` when taking difference of ``Index`` and itself as type was not preserved. (:issue:`19849`) MultiIndex From 38aeacbae4273e0928012b6cf6de57cdc7da89b4 Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Wed, 7 Mar 2018 10:25:20 -0500 Subject: [PATCH 04/12] Remove set difference fix --- doc/source/whatsnew/v0.23.0.txt | 1 - pandas/core/indexes/base.py | 9 +-------- pandas/core/indexes/category.py | 7 ------- pandas/core/indexes/multi.py | 4 ++-- pandas/core/indexes/period.py | 6 ------ pandas/core/indexes/range.py | 6 ------ pandas/tests/indexes/test_base.py | 11 ----------- pandas/tests/reshape/test_concat.py | 8 ++++---- 8 files changed, 7 insertions(+), 45 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 346a0648130bd..2df3c0fc8a8fe 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -838,7 +838,6 @@ Indexing - Bug in :class:`IntervalIndex` where set operations that returned an empty ``IntervalIndex`` had the wrong dtype (:issue:`19101`) - Bug in :meth:`DataFrame.drop_duplicates` where no ``KeyError`` is raised when passing in columns that don't exist on the ``DataFrame`` (issue:`19726`) - Bug in :func:`Index.union` and :func:`Index.intersection` where name of the ``Index`` of the result was not computed correctly for certain cases (:issue:`9943`, :issue:`9862`) -- Bug in :func:`Index.difference` when taking difference of ``Index`` and itself as type was not preserved. (:issue:`19849`) MultiIndex diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ab0f3665db9df..a1cf02de36019 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2454,13 +2454,6 @@ def intersection(self, other): taken.name = None return taken - def _create_empty_index(self, name): - """ - Returns an empty index. Overridden as necessary by - subclasses that have different constructors. - """ - return self.__class__([], name=name) - def difference(self, other): """ Return a new Index with elements from the index that are not in @@ -2489,7 +2482,7 @@ def difference(self, other): self._assert_can_do_setop(other) if self.equals(other): - return self._create_empty_index(get_op_result_name(self, other)) + return Index([], name=self.name) other, result_name = self._convert_can_do_setop(other) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 21700a507c5a0..8f877be7d6f9f 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -723,13 +723,6 @@ def insert(self, loc, item): codes = np.concatenate((codes[:loc], code, codes[loc:])) return self._create_from_codes(codes) - def _create_empty_index(self, name): - """ - Returns an empty index using categories and ordered of self - """ - return CategoricalIndex([], categories=self.categories, - ordered=self.ordered, name=name) - def _concat(self, to_concat, name): # if calling index is category, don't check dtype of others return CategoricalIndex._concat_same_dtype(self, to_concat, name) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index bc64e1de9219c..73f4aee1c4880 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2733,7 +2733,7 @@ def intersection(self, other): other_tuples = other._ndarray_values uniq_tuples = sorted(set(self_tuples) & set(other_tuples)) if len(uniq_tuples) == 0: - return MultiIndex(levels=self.levels, + return MultiIndex(levels=[[]] * self.nlevels, labels=[[]] * self.nlevels, names=result_names, verify_integrity=False) else: @@ -2755,7 +2755,7 @@ def difference(self, other): return self if self.equals(other): - return MultiIndex(levels=self.levels, + return MultiIndex(levels=[[]] * self.nlevels, labels=[[]] * self.nlevels, names=result_names, verify_integrity=False) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 7c51b39ee7ed5..4f4f222e4c137 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -963,12 +963,6 @@ def _convert_tolerance(self, tolerance, target): 'target index size') return self._maybe_convert_timedelta(tolerance) - def _create_empty_index(self, name): - """ - Returns an empty index using freq of self - """ - return PeriodIndex([], freq=self.freq, name=name) - def insert(self, loc, item): if not isinstance(item, Period) or self.freq != item.freq: return self.astype(object).insert(loc, item) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index c308ef6d219a5..8bca144780376 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -479,12 +479,6 @@ def join(self, other, how='left', level=None, return_indexers=False, def _concat_same_dtype(self, indexes, name): return _concat._concat_rangeindex_same_dtype(indexes).rename(name) - def _create_empty_index(self, name): - """ - Returns an empty index using step size of self - """ - return RangeIndex(start=None, stop=None, step=self._step, name=name) - def __len__(self): """ return the length of the RangeIndex diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 22b4aab512f28..41f22dedf108b 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -997,17 +997,6 @@ def test_iadd_string(self): index += '_x' assert 'a_x' in index - def test_difference_type(self): - # GH 9943 9862 - # If taking difference of a set and itself, it - # needs to preserve the type of the index - skip_index_keys = ['repeats'] - for key, id in self.indices.items(): - if key not in skip_index_keys: - result = id.difference(id) - expected = self.create_empty_index(id) - tm.assert_index_equal(result, expected) - def test_difference(self): first = self.strIndex[5:20] diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 02d118d49ffe9..437b4179c580a 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -2055,10 +2055,10 @@ def test_concat_categoricalindex(self): result = pd.concat([a, b, c], axis=1) - exp_idx = pd.CategoricalIndex([9, 0, 1, 2], categories=categories) - exp = pd.DataFrame({0: [1, 1, np.nan, np.nan], - 1: [np.nan, 2, 2, np.nan], - 2: [np.nan, np.nan, 3, 3]}, + exp_idx = pd.CategoricalIndex([0, 1, 2, 9]) + exp = pd.DataFrame({0: [1, np.nan, np.nan, 1], + 1: [2, 2, np.nan, np.nan], + 2: [np.nan, 3, 3, np.nan]}, columns=[0, 1, 2], index=exp_idx) tm.assert_frame_equal(result, exp) From fb3a9db0e47bacc2a0e46c81d037cd2819645d75 Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Wed, 7 Mar 2018 11:28:16 -0500 Subject: [PATCH 05/12] Only include changes relative to name preservation --- pandas/tests/indexes/test_base.py | 11 ----------- pandas/tests/reshape/test_concat.py | 8 ++++---- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 41f22dedf108b..fda489af65f51 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1072,17 +1072,6 @@ def test_symmetric_difference(self): assert tm.equalContents(result, expected) assert result.name == 'new_name' - def test_intersection_difference(self): - # Test that the intersection of an index with an - # empty index produces the same index as the difference - # of an index with itself. Test for all types - skip_index_keys = ['repeats'] - for key, id in self.indices.items(): - if key not in skip_index_keys: - inter = id.intersection(self.create_empty_index(id)) - diff = id.difference(id) - tm.assert_index_equal(inter, diff) - def test_is_numeric(self): assert not self.dateIndex.is_numeric() assert not self.strIndex.is_numeric() diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 437b4179c580a..02d118d49ffe9 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -2055,10 +2055,10 @@ def test_concat_categoricalindex(self): result = pd.concat([a, b, c], axis=1) - exp_idx = pd.CategoricalIndex([0, 1, 2, 9]) - exp = pd.DataFrame({0: [1, np.nan, np.nan, 1], - 1: [2, 2, np.nan, np.nan], - 2: [np.nan, 3, 3, np.nan]}, + exp_idx = pd.CategoricalIndex([9, 0, 1, 2], categories=categories) + exp = pd.DataFrame({0: [1, 1, np.nan, np.nan], + 1: [np.nan, 2, 2, np.nan], + 2: [np.nan, np.nan, 3, 3]}, columns=[0, 1, 2], index=exp_idx) tm.assert_frame_equal(result, exp) From f72377a977b39f1f1078f7f453be97e054847bb1 Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Wed, 7 Mar 2018 16:17:42 -0500 Subject: [PATCH 06/12] Use parametrize in test --- pandas/tests/indexes/test_base.py | 106 +++++++++++------------------- 1 file changed, 39 insertions(+), 67 deletions(-) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index fda489af65f51..05d80a505ff68 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -707,16 +707,30 @@ def test_intersect_str_dates(self): assert len(res) == 0 - def create_empty_index(self, id): - """ - Given an index, create an empty index of the - same class. Use drop to create it - to avoid worrying about extra args in - constructor - """ - return id.drop(id) - - def test_corner_union(self): + @pytest.mark.parametrize( + 'ftype, fname, stype, sname, expected_name', + [ + ('copy', 'A', 'copy', 'A', 'A'), + ('copy', 'A', 'copy', 'B', None), + ('copy', 'A', 'copy', None, None), + ('copy', 'A', 'empty', 'A', 'A'), + ('copy', 'A', 'empty', 'B', None), + ('copy', 'A', 'empty', None, None), + ('copy', None, 'copy', 'B', None), + ('copy', None, 'copy', None, None), + ('copy', None, 'empty', 'B', None), + ('copy', None, 'empty', None, None), + ('empty', 'A', 'copy', 'A', 'A'), + ('empty', 'A', 'copy', 'B', None), + ('empty', 'A', 'copy', None, None), + ('empty', 'A', 'empty', 'A', 'A'), + ('empty', 'A', 'empty', 'B', None), + ('empty', 'A', 'empty', None, None), + ('empty', None, 'empty', 'B', None), + ('empty', None, 'empty', None, None) + ]) + def test_corner_union(self, ftype, fname, stype, + sname, expected_name): # GH 9943 9862 # Test unions with various name combinations # Do not test MultiIndex @@ -724,66 +738,24 @@ def test_corner_union(self): skip_index_keys = ['tuples', 'repeats'] for key, id in self.indices.items(): if key not in skip_index_keys: - first = id.copy().set_names('A') - second = id.copy().set_names('A') - union = first.union(second) - expected = id.copy().set_names('A') - tm.assert_index_equal(union, expected) - - first = id.copy().set_names('A') - second = id.copy().set_names('B') - union = first.union(second) - expected = id.copy().set_names(None) - tm.assert_index_equal(union, expected) - - first = id.copy().set_names('A') - second = self.create_empty_index(id).set_names('B') - union = first.union(second) - expected = id.copy().set_names(None) - tm.assert_index_equal(union, expected) - - first = self.create_empty_index(id).set_names('A') - second = id.copy().set_names('B') - union = first.union(second) - expected = id.copy().set_names(None) - tm.assert_index_equal(union, expected) - - first = id.copy().set_names(None) - second = id.copy().set_names('B') - union = first.union(second) - expected = id.copy().set_names(None) - tm.assert_index_equal(union, expected) - - first = self.create_empty_index(id).set_names(None) - second = id.copy().set_names('B') - union = first.union(second) - expected = id.copy().set_names(None) - tm.assert_index_equal(union, expected) - - first = id.copy().set_names(None) - second = self.create_empty_index(id).set_names('B') - union = first.union(second) - expected = id.copy().set_names(None) - tm.assert_index_equal(union, expected) - - first = id.copy().set_names('A') - second = id.copy().set_names(None) - union = first.union(second) - expected = id.copy().set_names(None) - tm.assert_index_equal(union, expected) - - first = id.copy().set_names('A') - second = self.create_empty_index(id).set_names(None) - union = first.union(second) - expected = id.copy().set_names(None) - tm.assert_index_equal(union, expected) - - first = self.create_empty_index(id).set_names('A') - second = id.copy().set_names(None) + if ftype == 'copy': + first = id.copy() + else: + first = id.drop(id) + first = first.set_names(fname) + if stype == 'copy': + second = id.copy() + else: + second = id.drop(id) + second = second.set_names(sname) union = first.union(second) - expected = id.copy().set_names(None) + if (ftype == 'empty') and (stype == 'empty'): + expected = id.drop(id).set_names(expected_name) + else: + expected = id.copy().set_names(expected_name) tm.assert_index_equal(union, expected) + def test_chained_union(self): # Chained unions handles names correctly i1 = Index([1, 2], name='i1') i2 = Index([3, 4], name='i2') From 08eb29174471a343875e342fd902a97b28d35f9a Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Thu, 8 Mar 2018 14:14:34 -0500 Subject: [PATCH 07/12] remove is_corner_union and change parametrize on test --- pandas/core/indexes/base.py | 28 +++--------- pandas/core/indexes/category.py | 4 +- pandas/core/indexes/datetimes.py | 7 ++- pandas/core/indexes/range.py | 5 +-- pandas/core/indexes/timedeltas.py | 5 +-- pandas/tests/indexes/test_base.py | 75 +++++++++++++++---------------- 6 files changed, 50 insertions(+), 74 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a1cf02de36019..75532557b820f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2274,26 +2274,6 @@ def _get_reconciled_name_object(self, other): return self._shallow_copy(name=name) return self - def _union_corner_case(self, other): - """ - If self or other have no length, or self and other - are the same, then return self, after reconciling names - - Returns - ------- - Tuple of (is_corner, result), where is_corner is True if - it is a corner case, and result is the reconciled result - - """ - # GH 9943 9862 - if len(other) == 0 or self.equals(other): - return (True, self._get_reconciled_name_object(other)) - - if len(self) == 0: - return (True, other._get_reconciled_name_object(self)) - - return (False, None) - def union(self, other): """ Form the union of two Index objects and sorts if possible. @@ -2318,9 +2298,11 @@ def union(self, other): self._assert_can_do_setop(other) other = _ensure_index(other) - is_corner_case, corner_result = self._union_corner_case(other) - if is_corner_case: - return corner_result + if len(other) == 0 or self.equals(other): + return self._get_reconciled_name_object(other) + + if len(self) == 0: + return other._get_reconciled_name_object(self) # TODO: is_dtype_union_equal is a hack around # 1. buggy set ops with duplicates (GH #13432) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 8f877be7d6f9f..1220376e56557 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -303,9 +303,7 @@ def itemsize(self): def _wrap_setop_result(self, other, result): name = get_op_result_name(self, other) - return self._simple_new(result, name=name, - categories=self.categories, - ordered=self.ordered) + return self._shallow_copy(result, name=name) def get_values(self): """ return the underlying data as an ndarray """ diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 56675560d7828..5732aed4b7983 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1138,9 +1138,8 @@ def union(self, other): """ self._assert_can_do_setop(other) - is_corner_case, corner_result = self._union_corner_case(other) - if is_corner_case: - return corner_result + if len(other) == 0 or self.equals(other) or len(self) == 0: + return super(DatetimeIndex, self).union(other) if not isinstance(other, DatetimeIndex): try: @@ -1343,7 +1342,7 @@ def _wrap_setop_result(self, other, result): name = get_op_result_name(self, other) if not timezones.tz_compare(self.tz, other.tz): raise ValueError('Passed item and index have different timezone') - return self._simple_new(result, name=name, freq=None, tz=self.tz) + return self._shallow_copy(result, name=name, freq=None, tz=self.tz) def intersection(self, other): """ diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 8bca144780376..ff30f32309bf0 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -422,9 +422,8 @@ def union(self, other): union : Index """ self._assert_can_do_setop(other) - is_corner_case, corner_result = self._union_corner_case(other) - if is_corner_case: - return corner_result + if len(other) == 0 or self.equals(other) or len(self) == 0: + return super(RangeIndex, self).union(other) if isinstance(other, RangeIndex): start_s, step_s = self._start, self._step diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index ade9737978a9b..5899ca300be5f 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -547,9 +547,8 @@ def union(self, other): """ self._assert_can_do_setop(other) - is_corner_case, corner_result = self._union_corner_case(other) - if is_corner_case: - return corner_result + if len(other) == 0 or self.equals(other) or len(self) == 0: + return super(TimedeltaIndex, self).union(other) if not isinstance(other, TimedeltaIndex): try: diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 05d80a505ff68..baa32d79cfe2c 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -708,52 +708,51 @@ def test_intersect_str_dates(self): assert len(res) == 0 @pytest.mark.parametrize( - 'ftype, fname, stype, sname, expected_name', + 'fname, sname, expected_name', [ - ('copy', 'A', 'copy', 'A', 'A'), - ('copy', 'A', 'copy', 'B', None), - ('copy', 'A', 'copy', None, None), - ('copy', 'A', 'empty', 'A', 'A'), - ('copy', 'A', 'empty', 'B', None), - ('copy', 'A', 'empty', None, None), - ('copy', None, 'copy', 'B', None), - ('copy', None, 'copy', None, None), - ('copy', None, 'empty', 'B', None), - ('copy', None, 'empty', None, None), - ('empty', 'A', 'copy', 'A', 'A'), - ('empty', 'A', 'copy', 'B', None), - ('empty', 'A', 'copy', None, None), - ('empty', 'A', 'empty', 'A', 'A'), - ('empty', 'A', 'empty', 'B', None), - ('empty', 'A', 'empty', None, None), - ('empty', None, 'empty', 'B', None), - ('empty', None, 'empty', None, None) + ('A', 'A', 'A'), + ('A', 'B', None), + ('A', None, None), + (None, 'B', None), + (None, None, None), ]) - def test_corner_union(self, ftype, fname, stype, - sname, expected_name): + def test_corner_union(self, fname, sname, expected_name): # GH 9943 9862 # Test unions with various name combinations # Do not test MultiIndex skip_index_keys = ['tuples', 'repeats'] for key, id in self.indices.items(): - if key not in skip_index_keys: - if ftype == 'copy': - first = id.copy() - else: - first = id.drop(id) - first = first.set_names(fname) - if stype == 'copy': - second = id.copy() - else: - second = id.drop(id) - second = second.set_names(sname) - union = first.union(second) - if (ftype == 'empty') and (stype == 'empty'): - expected = id.drop(id).set_names(expected_name) - else: - expected = id.copy().set_names(expected_name) - tm.assert_index_equal(union, expected) + if key in skip_index_keys: + continue + + # Test copy.union(copy) + first = id.copy().set_names(fname) + second = id.copy().set_names(sname) + union = first.union(second) + expected = id.copy().set_names(expected_name) + tm.assert_index_equal(union, expected) + + # Test copy.union(empty) + first = id.copy().set_names(fname) + second = id.drop(id).set_names(sname) + union = first.union(second) + expected = id.copy().set_names(expected_name) + tm.assert_index_equal(union, expected) + + # Test empty.union(copy) + first = id.drop(id).set_names(fname) + second = id.copy().set_names(sname) + union = first.union(second) + expected = id.copy().set_names(expected_name) + tm.assert_index_equal(union, expected) + + # Test empty.union(empty) + first = id.drop(id).set_names(fname) + second = id.drop(id).set_names(sname) + union = first.union(second) + expected = id.drop(id).set_names(expected_name) + tm.assert_index_equal(union, expected) def test_chained_union(self): # Chained unions handles names correctly From a48f734843022c5dc652790a333b38ac8dc8a8b5 Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Mon, 9 Jul 2018 15:57:56 -0400 Subject: [PATCH 08/12] Update for current master --- pandas/tests/indexes/test_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 7dc34fbd0e1cd..d7c9109102d68 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -885,7 +885,7 @@ def test_union_identity(self): @pytest.mark.parametrize("first_list", [list('ab'), list()]) @pytest.mark.parametrize("second_list", [list('ab'), list()]) @pytest.mark.parametrize("first_name, second_name, expected_name", [ - ('A', 'B', None), (None, 'B', 'B'), ('A', None, 'A')]) + ('A', 'B', None), (None, 'B', None), ('A', None, None)]) def test_union_name_preservation(self, first_list, second_list, first_name, second_name, expected_name): first = Index(first_list, name=first_name) From b977e528b66bf5b4f345cbb7ccdcb740435115b5 Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Tue, 10 Jul 2018 10:27:46 -0400 Subject: [PATCH 09/12] Use fixtures in testing --- doc/source/whatsnew/v0.24.0.txt | 3 +- pandas/core/indexes/base.py | 2 +- pandas/tests/indexes/conftest.py | 1 + pandas/tests/indexes/test_base.py | 73 +++++++++++++++++-------------- 4 files changed, 42 insertions(+), 37 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 09fe8492daefc..24fc0b9b92c72 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -411,7 +411,7 @@ Reshaping - Bug in :func:`pandas.concat` when joining resampled DataFrames with timezone aware index (:issue:`13783`) - Bug in :meth:`Series.combine_first` with ``datetime64[ns, tz]`` dtype which would return tz-naive result (:issue:`21469`) - Bug in :meth:`Series.where` and :meth:`DataFrame.where` with ``datetime64[ns, tz]`` dtype (:issue:`21546`) -- +- Bug in :func:`Index.union` and :func:`Index.intersection` where name of the ``Index`` of the result was not computed correctly for certain cases (:issue:`9943`, :issue:`9862`) - Build Changes @@ -425,6 +425,5 @@ Other - :meth: `~pandas.io.formats.style.Styler.background_gradient` now takes a ``text_color_threshold`` parameter to automatically lighten the text color based on the luminance of the background color. This improves readability with dark background colors without the need to limit the background colormap range. (:issue:`21258`) - Require at least 0.28.2 version of ``cython`` to support read-only memoryviews (:issue:`21688`) -- Bug in :func:`Index.union` and :func:`Index.intersection` where name of the ``Index`` of the result was not computed correctly for certain cases (:issue:`9943`, :issue:`9862`) - - diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7e2e6ab077d36..52b277d6846ad 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2748,7 +2748,7 @@ def union(self, other): return self._wrap_setop_result(other, result) def _wrap_setop_result(self, other, result): - return self.__class__(result, name=get_op_result_name(self, other)) + return self._constructor(result, name=get_op_result_name(self, other)) def intersection(self, other): """ diff --git a/pandas/tests/indexes/conftest.py b/pandas/tests/indexes/conftest.py index 6d88ef0cfa6c5..b7f9f8dc8aae2 100644 --- a/pandas/tests/indexes/conftest.py +++ b/pandas/tests/indexes/conftest.py @@ -14,6 +14,7 @@ tm.makeTimedeltaIndex(100), tm.makeIntIndex(100), tm.makeUIntIndex(100), + tm.makeRangeIndex(100), tm.makeFloatIndex(100), Index([True, False]), tm.makeCategoricalIndex(100), diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index d7c9109102d68..b3bdb31946363 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -713,7 +713,10 @@ def test_empty_fancy_raises(self, attr): @pytest.mark.parametrize("itm", [101, 'no_int']) def test_getitem_error(self, indices, itm): - with pytest.raises(IndexError): + error_type = IndexError + if isinstance(indices, RangeIndex) and (itm == 'no_int'): + error_type = ValueError + with pytest.raises(error_type): indices[itm] def test_intersection(self): @@ -793,43 +796,41 @@ def test_intersect_str_dates(self): (None, 'B', None), (None, None, None), ]) - def test_corner_union(self, fname, sname, expected_name): + def test_corner_union(self, indices, fname, sname, expected_name): # GH 9943 9862 # Test unions with various name combinations - # Do not test MultiIndex + # Do not test MultiIndex or repeats - skip_index_keys = ['tuples', 'repeats'] - for key, id in self.indices.items(): - if key in skip_index_keys: - continue + if isinstance(indices, MultiIndex) or not indices.is_unique: + pytest.skip("Not for MultiIndex or repeated indices") + + # Test copy.union(copy) + first = indices.copy().set_names(fname) + second = indices.copy().set_names(sname) + union = first.union(second) + expected = indices.copy().set_names(expected_name) + tm.assert_index_equal(union, expected) + + # Test copy.union(empty) + first = indices.copy().set_names(fname) + second = indices.drop(indices).set_names(sname) + union = first.union(second) + expected = indices.copy().set_names(expected_name) + tm.assert_index_equal(union, expected) + + # Test empty.union(copy) + first = indices.drop(indices).set_names(fname) + second = indices.copy().set_names(sname) + union = first.union(second) + expected = indices.copy().set_names(expected_name) + tm.assert_index_equal(union, expected) - # Test copy.union(copy) - first = id.copy().set_names(fname) - second = id.copy().set_names(sname) - union = first.union(second) - expected = id.copy().set_names(expected_name) - tm.assert_index_equal(union, expected) - - # Test copy.union(empty) - first = id.copy().set_names(fname) - second = id.drop(id).set_names(sname) - union = first.union(second) - expected = id.copy().set_names(expected_name) - tm.assert_index_equal(union, expected) - - # Test empty.union(copy) - first = id.drop(id).set_names(fname) - second = id.copy().set_names(sname) - union = first.union(second) - expected = id.copy().set_names(expected_name) - tm.assert_index_equal(union, expected) - - # Test empty.union(empty) - first = id.drop(id).set_names(fname) - second = id.drop(id).set_names(sname) - union = first.union(second) - expected = id.drop(id).set_names(expected_name) - tm.assert_index_equal(union, expected) + # Test empty.union(empty) + first = indices.drop(indices).set_names(fname) + second = indices.drop(indices).set_names(sname) + union = first.union(second) + expected = indices.drop(indices).set_names(expected_name) + tm.assert_index_equal(union, expected) def test_chained_union(self): # Chained unions handles names correctly @@ -2533,6 +2534,10 @@ def test_generated_op_names(opname, indices): # pd.Index.__rsub__ does not exist; though the method does exist # for subclasses. see GH#19723 return + if (isinstance(index, RangeIndex) and + opname in ['add', 'radd', 'sub', 'rsub', + 'mul', 'rmul', 'truediv', 'rtruediv']): + pytest.skip("RangeIndex does operators differently") opname = '__{name}__'.format(name=opname) method = getattr(index, opname) assert method.__name__ == opname From d1620f66cce778d86bdbed1de1eb2f14e795ac59 Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Thu, 1 Nov 2018 12:31:20 -0400 Subject: [PATCH 10/12] Merge with latest master --- pandas/tests/indexes/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 468b1610a9142..c5cbaea23df76 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -360,10 +360,10 @@ def test_has_duplicates(self, indices): def test_duplicated(self, indices, keep): if type(indices) is not self._holder: pytest.skip('Can only check if we know the index type') - if not len(indices) or isinstance(indices, MultiIndex): + if not len(indices) or isinstance(indices, (MultiIndex, RangeIndex)): # MultiIndex tested separately in: # tests/indexes/multi/test_unique_and_duplicates - pytest.skip('Skip check for empty Index and MultiIndex') + pytest.skip('Skip check for empty Index, MultiIndex, RangeIndex') idx = self._holder(indices) if idx.has_duplicates: From 893efb89d93ea6f3369f2263ef6ef192fae33752 Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Mon, 5 Nov 2018 15:02:53 -0500 Subject: [PATCH 11/12] Fix RangeIndex implementation to avoid test changes --- pandas/core/indexes/range.py | 11 +++++++++-- pandas/tests/indexes/test_base.py | 9 +-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index d215248b27205..156a6d9f6b35f 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -6,6 +6,7 @@ import numpy as np from pandas._libs import index as libindex +from pandas._libs import lib import pandas.compat as compat from pandas.compat import get_range_parameters, lrange, range from pandas.compat.numpy import function as nv @@ -502,7 +503,12 @@ def __getitem__(self, key): super_getitem = super(RangeIndex, self).__getitem__ if is_scalar(key): - n = int(key) + if not lib.is_integer(key): + raise IndexError("only integers, slices (`:`), " + "ellipsis (`...`), numpy.newaxis (`None`) " + "and integer or boolean " + "arrays are valid indices") + n = com.cast_scalar_indexer(key) if n != key: return super_getitem(key) if n < 0: @@ -653,7 +659,8 @@ def _evaluate_numeric_binop(self, other): return op(self._int64index, other) # TODO: Do attrs get handled reliably? - return _evaluate_numeric_binop + name = '__{name}__'.format(name=op.__name__) + return compat.set_function_name(_evaluate_numeric_binop, name, cls) cls.__add__ = _make_evaluate_binop(operator.add) cls.__radd__ = _make_evaluate_binop(ops.radd) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 4245b1c0637e4..724dffc49dd3b 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -715,10 +715,7 @@ def test_empty_fancy_raises(self, attr): # FutureWarning from non-tuple sequence of nd indexing @pytest.mark.filterwarnings("ignore::FutureWarning") def test_getitem_error(self, indices, itm): - error_type = IndexError - if isinstance(indices, RangeIndex) and (itm == 'no_int'): - error_type = ValueError - with pytest.raises(error_type): + with pytest.raises(IndexError): indices[itm] def test_intersection(self): @@ -2581,10 +2578,6 @@ def test_generated_op_names(opname, indices): # pd.Index.__rsub__ does not exist; though the method does exist # for subclasses. see GH#19723 return - if (isinstance(index, RangeIndex) and - opname in ['add', 'radd', 'sub', 'rsub', - 'mul', 'rmul', 'truediv', 'rtruediv']): - pytest.skip("RangeIndex does operators differently") opname = '__{name}__'.format(name=opname) method = getattr(index, opname) assert method.__name__ == opname From fa7311ad48b2af262f403db824836bf87f126ffd Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Tue, 6 Nov 2018 08:10:35 -0500 Subject: [PATCH 12/12] fix isort on range.py --- pandas/core/indexes/range.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 156a6d9f6b35f..d1b5645928921 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -5,8 +5,7 @@ import numpy as np -from pandas._libs import index as libindex -from pandas._libs import lib +from pandas._libs import index as libindex, lib import pandas.compat as compat from pandas.compat import get_range_parameters, lrange, range from pandas.compat.numpy import function as nv