-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: names on union and intersection for Index were inconsistent (GH9943 GH9862) #19849
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
aad5884
3ef2a69
ba9ccdd
38aeacb
fb3a9db
f72377a
08eb291
396088f
41cd000
a48f734
b977e52
a030ede
9ba6960
d1620f6
893efb8
fd7f23f
2d1c366
fa7311a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -57,7 +57,7 @@ | |
import pandas.core.algorithms as algos | ||
import pandas.core.sorting as sorting | ||
from pandas.io.formats.printing import pprint_thing | ||
from pandas.core.ops import make_invalid_op | ||
from pandas.core.ops import make_invalid_op, get_op_result_name | ||
from pandas.core.config import get_option | ||
from pandas.core.strings import StringMethods | ||
|
||
|
@@ -1191,7 +1191,7 @@ def _convert_can_do_setop(self, other): | |
other = Index(other, name=self.name) | ||
result_name = self.name | ||
else: | ||
result_name = self.name if self.name == other.name else None | ||
result_name = get_op_result_name(self, other) | ||
return other, result_name | ||
|
||
def _convert_for_op(self, value): | ||
|
@@ -1240,9 +1240,9 @@ def set_names(self, names, level=None, inplace=False): | |
Examples | ||
-------- | ||
>>> Index([1, 2, 3, 4]).set_names('foo') | ||
Int64Index([1, 2, 3, 4], dtype='int64') | ||
Int64Index([1, 2, 3, 4], dtype='int64', name='foo') | ||
>>> Index([1, 2, 3, 4]).set_names(['foo']) | ||
Int64Index([1, 2, 3, 4], dtype='int64') | ||
Int64Index([1, 2, 3, 4], dtype='int64', name='foo') | ||
>>> idx = MultiIndex.from_tuples([(1, u'one'), (1, u'two'), | ||
(2, u'one'), (2, u'two')], | ||
names=['foo', 'bar']) | ||
|
@@ -2263,21 +2263,37 @@ def __or__(self, other): | |
def __xor__(self, other): | ||
return self.symmetric_difference(other) | ||
|
||
def _get_consensus_name(self, other): | ||
def _get_reconciled_name_object(self, other): | ||
""" | ||
Given 2 indexes, give a consensus name meaning | ||
we take the not None one, or None if the names differ. | ||
Return a new object if we are resetting the name | ||
If the result of a set operation will be self, | ||
return self, unless the name changes, in which | ||
case make a shallow copy of self. | ||
""" | ||
if self.name != other.name: | ||
if self.name is None or other.name is None: | ||
name = self.name or other.name | ||
else: | ||
name = None | ||
if self.name != name: | ||
return self._shallow_copy(name=name) | ||
name = get_op_result_name(self, other) | ||
if self.name != name: | ||
return self._shallow_copy(name=name) | ||
return self | ||
|
||
def _union_corner_case(self, other): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is there a reason you are creatijng this as a separate method? why not inline it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 for inlining it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jreback @TomAugspurger The subclasses for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok this PR looks a lot better. I still don't like this hlper function. can we refactor to not use it |
||
""" | ||
If self or other have no length, or self and other | ||
are the same, then return self, after reconciling names | ||
|
||
Returns | ||
------- | ||
Tuple of (is_corner, result), where is_corner is True if | ||
it is a corner case, and result is the reconciled result | ||
|
||
""" | ||
# GH 9943 9862 | ||
if len(other) == 0 or self.equals(other): | ||
return (True, self._get_reconciled_name_object(other)) | ||
|
||
if len(self) == 0: | ||
return (True, other._get_reconciled_name_object(self)) | ||
|
||
return (False, None) | ||
|
||
def union(self, other): | ||
""" | ||
Form the union of two Index objects and sorts if possible. | ||
|
@@ -2302,11 +2318,9 @@ def union(self, other): | |
self._assert_can_do_setop(other) | ||
other = _ensure_index(other) | ||
|
||
if len(other) == 0 or self.equals(other): | ||
return self._get_consensus_name(other) | ||
|
||
if len(self) == 0: | ||
return other._get_consensus_name(self) | ||
is_corner_case, corner_result = self._union_corner_case(other) | ||
if is_corner_case: | ||
return corner_result | ||
|
||
# TODO: is_dtype_union_equal is a hack around | ||
# 1. buggy set ops with duplicates (GH #13432) | ||
|
@@ -2369,11 +2383,10 @@ def union(self, other): | |
stacklevel=3) | ||
|
||
# for subclasses | ||
return self._wrap_union_result(other, result) | ||
return self._wrap_setop_result(other, result) | ||
|
||
def _wrap_union_result(self, other, result): | ||
name = self.name if self.name == other.name else None | ||
return self.__class__(result, name=name) | ||
def _wrap_setop_result(self, other, result): | ||
return self.__class__(result, name=get_op_result_name(self, other)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jreback I've done some testing with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thats fine There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we consistently use can you use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is no other usage of We had discussion 4 months ago about why |
||
|
||
def intersection(self, other): | ||
""" | ||
|
@@ -2403,7 +2416,7 @@ def intersection(self, other): | |
other = _ensure_index(other) | ||
|
||
if self.equals(other): | ||
return self._get_consensus_name(other) | ||
return self._get_reconciled_name_object(other) | ||
|
||
if not is_dtype_equal(self.dtype, other.dtype): | ||
this = self.astype('O') | ||
|
@@ -2423,7 +2436,7 @@ def intersection(self, other): | |
if self.is_monotonic and other.is_monotonic: | ||
try: | ||
result = self._inner_indexer(lvals, rvals)[0] | ||
return self._wrap_union_result(other, result) | ||
return self._wrap_setop_result(other, result) | ||
except TypeError: | ||
pass | ||
|
||
|
@@ -3552,7 +3565,7 @@ def _join_monotonic(self, other, how='left', return_indexers=False): | |
return join_index | ||
|
||
def _wrap_joined_index(self, joined, other): | ||
name = self.name if self.name == other.name else None | ||
name = get_op_result_name(self, other) | ||
return Index(joined, name=name) | ||
|
||
def _get_string_slice(self, key, use_lhs=True, use_rhs=True): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,7 @@ | |
import pandas.core.base as base | ||
import pandas.core.missing as missing | ||
import pandas.core.indexes.base as ibase | ||
from pandas.core.ops import get_op_result_name | ||
|
||
_index_doc_kwargs = dict(ibase._index_doc_kwargs) | ||
_index_doc_kwargs.update(dict(target_klass='CategoricalIndex')) | ||
|
@@ -300,6 +301,12 @@ def itemsize(self): | |
# Size of the items in categories, not codes. | ||
return self.values.itemsize | ||
|
||
def _wrap_setop_result(self, other, result): | ||
name = get_op_result_name(self, other) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use _shallow_copy here, we never call _simple_new directly. (another reason why the super method should use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you move this method to Index.base? does it break anything? |
||
return self._simple_new(result, name=name, | ||
categories=self.categories, | ||
ordered=self.ordered) | ||
|
||
def get_values(self): | ||
""" return the underlying data as an ndarray """ | ||
return self._data.get_values() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,6 +38,7 @@ | |
|
||
from pandas.core.indexes.base import Index, _index_shared_docs | ||
from pandas.core.indexes.numeric import Int64Index, Float64Index | ||
from pandas.core.ops import get_op_result_name | ||
import pandas.compat as compat | ||
from pandas.tseries.frequencies import to_offset, get_period_alias, Resolution | ||
from pandas.core.indexes.datetimelike import ( | ||
|
@@ -1136,6 +1137,11 @@ def union(self, other): | |
y : Index or DatetimeIndex | ||
""" | ||
self._assert_can_do_setop(other) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you just call the super There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
is_corner_case, corner_result = self._union_corner_case(other) | ||
if is_corner_case: | ||
return corner_result | ||
|
||
if not isinstance(other, DatetimeIndex): | ||
try: | ||
other = DatetimeIndex(other) | ||
|
@@ -1237,7 +1243,7 @@ def _maybe_utc_convert(self, other): | |
return this, other | ||
|
||
def _wrap_joined_index(self, joined, other): | ||
name = self.name if self.name == other.name else None | ||
name = get_op_result_name(self, other) | ||
if (isinstance(other, DatetimeIndex) and | ||
self.offset == other.offset and | ||
self._can_fast_union(other)): | ||
|
@@ -1333,8 +1339,8 @@ def __iter__(self): | |
box="timestamp") | ||
return iter(converted) | ||
|
||
def _wrap_union_result(self, other, result): | ||
name = self.name if self.name == other.name else None | ||
def _wrap_setop_result(self, other, result): | ||
name = get_op_result_name(self, other) | ||
if not timezones.tz_compare(self.tz, other.tz): | ||
raise ValueError('Passed item and index have different timezone') | ||
return self._simple_new(result, name=name, freq=None, tz=self.tz) | ||
|
@@ -1353,6 +1359,10 @@ def intersection(self, other): | |
y : Index or DatetimeIndex | ||
""" | ||
self._assert_can_do_setop(other) | ||
|
||
if self.equals(other): | ||
return self._get_reconciled_name_object(other) | ||
|
||
if not isinstance(other, DatetimeIndex): | ||
try: | ||
other = DatetimeIndex(other) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ | |
_ensure_object) | ||
from pandas.core.dtypes.dtypes import PeriodDtype | ||
from pandas.core.dtypes.generic import ABCSeries | ||
from pandas.core.ops import get_op_result_name | ||
|
||
import pandas.tseries.frequencies as frequencies | ||
from pandas.tseries.frequencies import get_freq_code as _gfc | ||
|
@@ -996,8 +997,8 @@ def _assert_can_do_setop(self, other): | |
msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) | ||
raise IncompatibleFrequency(msg) | ||
|
||
def _wrap_union_result(self, other, result): | ||
name = self.name if self.name == other.name else None | ||
def _wrap_setop_result(self, other, result): | ||
name = get_op_result_name(self, other) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think could just remove this entirely here if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The reason we can't move it to base from category is because in the case of category, we need to preserve the categories, and in the case of period, we need to preserve frequency. So we could investigate in a separate PR about moving the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah that's fine |
||
result = self._apply_meta(result) | ||
result.name = name | ||
return result | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -256,7 +256,8 @@ def tolist(self): | |
@Appender(_index_shared_docs['_shallow_copy']) | ||
def _shallow_copy(self, values=None, **kwargs): | ||
if values is None: | ||
return RangeIndex(name=self.name, fastpath=True, | ||
name = kwargs.get("name", self.name) | ||
return RangeIndex(name=name, fastpath=True, | ||
**dict(self._get_data_as_items())) | ||
else: | ||
kwargs.setdefault('name', self.name) | ||
|
@@ -337,6 +338,10 @@ def intersection(self, other): | |
------- | ||
intersection : Index | ||
""" | ||
|
||
if self.equals(other): | ||
return self._get_reconciled_name_object(other) | ||
|
||
if not isinstance(other, RangeIndex): | ||
return super(RangeIndex, self).intersection(other) | ||
|
||
|
@@ -417,10 +422,10 @@ def union(self, other): | |
union : Index | ||
""" | ||
self._assert_can_do_setop(other) | ||
if len(other) == 0 or self.equals(other): | ||
return self | ||
if len(self) == 0: | ||
return other | ||
is_corner_case, corner_result = self._union_corner_case(other) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here |
||
if is_corner_case: | ||
return corner_result | ||
|
||
if isinstance(other, RangeIndex): | ||
start_s, step_s = self._start, self._step | ||
end_s = self._start + self._step * (len(self) - 1) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,7 @@ | |
from pandas.core.base import _shared_docs | ||
from pandas.core.indexes.base import _index_shared_docs | ||
import pandas.core.common as com | ||
from pandas.core.ops import get_op_result_name | ||
import pandas.core.dtypes.concat as _concat | ||
from pandas.util._decorators import Appender, Substitution, deprecate_kwarg | ||
from pandas.core.indexes.datetimelike import TimelikeOps, DatetimeIndexOpsMixin | ||
|
@@ -545,6 +546,11 @@ def union(self, other): | |
y : Index or TimedeltaIndex | ||
""" | ||
self._assert_can_do_setop(other) | ||
|
||
is_corner_case, corner_result = self._union_corner_case(other) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and here |
||
if is_corner_case: | ||
return corner_result | ||
|
||
if not isinstance(other, TimedeltaIndex): | ||
try: | ||
other = TimedeltaIndex(other) | ||
|
@@ -577,7 +583,7 @@ def join(self, other, how='left', level=None, return_indexers=False, | |
sort=sort) | ||
|
||
def _wrap_joined_index(self, joined, other): | ||
name = self.name if self.name == other.name else None | ||
name = get_op_result_name(self, other) | ||
if (isinstance(other, TimedeltaIndex) and self.freq == other.freq and | ||
self._can_fast_union(other)): | ||
joined = self._shallow_copy(joined, name=name) | ||
|
@@ -637,10 +643,6 @@ def _fast_union(self, other): | |
else: | ||
return left | ||
|
||
def _wrap_union_result(self, other, result): | ||
name = self.name if self.name == other.name else None | ||
return self._simple_new(result, name=name, freq=None) | ||
|
||
def intersection(self, other): | ||
""" | ||
Specialized intersection for TimedeltaIndex objects. May be much faster | ||
|
@@ -655,6 +657,10 @@ def intersection(self, other): | |
y : Index or TimedeltaIndex | ||
""" | ||
self._assert_can_do_setop(other) | ||
|
||
if self.equals(other): | ||
return self._get_reconciled_name_object(other) | ||
|
||
if not isinstance(other, TimedeltaIndex): | ||
try: | ||
other = TimedeltaIndex(other) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think might be able to dispense with this entirely, and just always do
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do this in
_wrap_setup_result
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So if I eliminate
self.get_setop_name_object
and use_wrap_setop_result
, other tests fail, because those tests assume the corner cases return the same object.