-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: names on union and intersection for Index were inconsistent (GH9943 GH9862) #19849
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
aad5884
3ef2a69
ba9ccdd
38aeacb
fb3a9db
f72377a
08eb291
396088f
41cd000
a48f734
b977e52
a030ede
9ba6960
d1620f6
893efb8
fd7f23f
2d1c366
fa7311a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -61,7 +61,7 @@ | |
import pandas.core.sorting as sorting | ||
from pandas.io.formats.printing import ( | ||
pprint_thing, default_pprint, format_object_summary, format_object_attrs) | ||
from pandas.core.ops import make_invalid_op | ||
from pandas.core.ops import make_invalid_op, get_op_result_name | ||
from pandas.core.strings import StringMethods | ||
|
||
__all__ = ['Index'] | ||
|
@@ -1253,7 +1253,7 @@ def _convert_can_do_setop(self, other): | |
other = Index(other, name=self.name) | ||
result_name = self.name | ||
else: | ||
result_name = self.name if self.name == other.name else None | ||
result_name = get_op_result_name(self, other) | ||
return other, result_name | ||
|
||
def _convert_for_op(self, value): | ||
|
@@ -2745,19 +2745,15 @@ def __or__(self, other): | |
def __xor__(self, other): | ||
return self.symmetric_difference(other) | ||
|
||
def _get_consensus_name(self, other): | ||
def _get_reconciled_name_object(self, other): | ||
""" | ||
Given 2 indexes, give a consensus name meaning | ||
we take the not None one, or None if the names differ. | ||
Return a new object if we are resetting the name | ||
If the result of a set operation will be self, | ||
return self, unless the name changes, in which | ||
case make a shallow copy of self. | ||
""" | ||
if self.name != other.name: | ||
if self.name is None or other.name is None: | ||
name = self.name or other.name | ||
else: | ||
name = None | ||
if self.name != name: | ||
return self._shallow_copy(name=name) | ||
name = get_op_result_name(self, other) | ||
if self.name != name: | ||
return self._shallow_copy(name=name) | ||
return self | ||
|
||
def union(self, other): | ||
|
@@ -2785,10 +2781,10 @@ def union(self, other): | |
other = ensure_index(other) | ||
|
||
if len(other) == 0 or self.equals(other): | ||
return self._get_consensus_name(other) | ||
return self._get_reconciled_name_object(other) | ||
|
||
if len(self) == 0: | ||
return other._get_consensus_name(self) | ||
return other._get_reconciled_name_object(self) | ||
|
||
# TODO: is_dtype_union_equal is a hack around | ||
# 1. buggy set ops with duplicates (GH #13432) | ||
|
@@ -2851,11 +2847,10 @@ def union(self, other): | |
stacklevel=3) | ||
|
||
# for subclasses | ||
return self._wrap_union_result(other, result) | ||
return self._wrap_setop_result(other, result) | ||
|
||
def _wrap_union_result(self, other, result): | ||
name = self.name if self.name == other.name else None | ||
return self.__class__(result, name=name) | ||
def _wrap_setop_result(self, other, result): | ||
return self._constructor(result, name=get_op_result_name(self, other)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would shallowcopy work here? Constructor isn’t used very often in the indexes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jbrockmendel repeating (part of) reply from a few months ago:
|
||
|
||
def intersection(self, other): | ||
""" | ||
|
@@ -2885,7 +2880,7 @@ def intersection(self, other): | |
other = ensure_index(other) | ||
|
||
if self.equals(other): | ||
return self._get_consensus_name(other) | ||
return self._get_reconciled_name_object(other) | ||
|
||
if not is_dtype_equal(self.dtype, other.dtype): | ||
this = self.astype('O') | ||
|
@@ -2905,7 +2900,7 @@ def intersection(self, other): | |
if self.is_monotonic and other.is_monotonic: | ||
try: | ||
result = self._inner_indexer(lvals, rvals)[0] | ||
return self._wrap_union_result(other, result) | ||
return self._wrap_setop_result(other, result) | ||
except TypeError: | ||
pass | ||
|
||
|
@@ -4175,7 +4170,7 @@ def _join_monotonic(self, other, how='left', return_indexers=False): | |
return join_index | ||
|
||
def _wrap_joined_index(self, joined, other): | ||
name = self.name if self.name == other.name else None | ||
name = get_op_result_name(self, other) | ||
return Index(joined, name=name) | ||
|
||
def _get_string_slice(self, key, use_lhs=True, use_rhs=True): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,7 @@ | |
import pandas.core.common as com | ||
import pandas.core.missing as missing | ||
import pandas.core.indexes.base as ibase | ||
from pandas.core.ops import get_op_result_name | ||
from pandas.core.arrays.categorical import Categorical, contains | ||
|
||
_index_doc_kwargs = dict(ibase._index_doc_kwargs) | ||
|
@@ -324,6 +325,10 @@ def itemsize(self): | |
# Size of the items in categories, not codes. | ||
return self.values.itemsize | ||
|
||
def _wrap_setop_result(self, other, result): | ||
name = get_op_result_name(self, other) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use _shallow_copy here, we never call _simple_new directly. (another reason why the super method should use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you move this method to Index.base? does it break anything? |
||
return self._shallow_copy(result, name=name) | ||
|
||
def get_values(self): | ||
""" return the underlying data as an ndarray """ | ||
return self._data.get_values() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,7 @@ | |
|
||
from pandas.core.indexes.base import Index, _index_shared_docs | ||
from pandas.core.indexes.numeric import Int64Index | ||
from pandas.core.ops import get_op_result_name | ||
import pandas.compat as compat | ||
from pandas.tseries.frequencies import to_offset, Resolution | ||
from pandas.core.indexes.datetimelike import ( | ||
|
@@ -592,6 +593,10 @@ def union(self, other): | |
y : Index or DatetimeIndex | ||
""" | ||
self._assert_can_do_setop(other) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you just call the super There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
if len(other) == 0 or self.equals(other) or len(self) == 0: | ||
return super(DatetimeIndex, self).union(other) | ||
|
||
if not isinstance(other, DatetimeIndex): | ||
try: | ||
other = DatetimeIndex(other) | ||
|
@@ -674,7 +679,7 @@ def _maybe_utc_convert(self, other): | |
return this, other | ||
|
||
def _wrap_joined_index(self, joined, other): | ||
name = self.name if self.name == other.name else None | ||
name = get_op_result_name(self, other) | ||
if (isinstance(other, DatetimeIndex) and | ||
self.freq == other.freq and | ||
self._can_fast_union(other)): | ||
|
@@ -745,11 +750,11 @@ def _fast_union(self, other): | |
else: | ||
return left | ||
|
||
def _wrap_union_result(self, other, result): | ||
name = self.name if self.name == other.name else None | ||
def _wrap_setop_result(self, other, result): | ||
name = get_op_result_name(self, other) | ||
if not timezones.tz_compare(self.tz, other.tz): | ||
raise ValueError('Passed item and index have different timezone') | ||
return self._simple_new(result, name=name, freq=None, tz=self.tz) | ||
return self._shallow_copy(result, name=name, freq=None, tz=self.tz) | ||
|
||
def intersection(self, other): | ||
""" | ||
|
@@ -765,6 +770,10 @@ def intersection(self, other): | |
y : Index or DatetimeIndex | ||
""" | ||
self._assert_can_do_setop(other) | ||
|
||
if self.equals(other): | ||
return self._get_reconciled_name_object(other) | ||
|
||
if not isinstance(other, DatetimeIndex): | ||
try: | ||
other = DatetimeIndex(other) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,9 +12,9 @@ | |
is_integer_dtype, | ||
is_datetime64_any_dtype, | ||
is_bool_dtype, | ||
pandas_dtype, | ||
pandas_dtype | ||
) | ||
|
||
from pandas.core.ops import get_op_result_name | ||
from pandas.core.accessor import PandasDelegate, delegate_names | ||
from pandas.core.indexes.datetimes import DatetimeIndex, Int64Index, Index | ||
from pandas.core.indexes.datetimelike import ( | ||
|
@@ -848,8 +848,8 @@ def _assert_can_do_setop(self, other): | |
msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) | ||
raise IncompatibleFrequency(msg) | ||
|
||
def _wrap_union_result(self, other, result): | ||
name = self.name if self.name == other.name else None | ||
def _wrap_setop_result(self, other, result): | ||
name = get_op_result_name(self, other) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think could just remove this entirely here if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The reason we can't move it to base from category is because in the case of category, we need to preserve the categories, and in the case of period, we need to preserve frequency. So we could investigate in a separate PR about moving the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah that's fine |
||
result = self._apply_meta(result) | ||
result.name = name | ||
return result | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think might be able to dispense with this entirely, and just always do
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do this in
_wrap_setup_result
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So if I eliminate
self.get_setop_name_object
and use_wrap_setop_result
, other tests fail, because those tests assume the corner cases return the same object.