Skip to content

BUG: name retention in Index.intersection #38111

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Dec 2, 2020
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -772,6 +772,7 @@ Other
- Fixed bug in metadata propagation incorrectly copying DataFrame columns as metadata when the column name overlaps with the metadata name (:issue:`37037`)
- Fixed metadata propagation in the :class:`Series.dt`, :class:`Series.str` accessors, :class:`DataFrame.duplicated`, :class:`DataFrame.stack`, :class:`DataFrame.unstack`, :class:`DataFrame.pivot`, :class:`DataFrame.append`, :class:`DataFrame.diff`, :class:`DataFrame.applymap` and :class:`DataFrame.update` methods (:issue:`28283`, :issue:`37381`)
- Fixed metadata propagation when selecting columns with ``DataFrame.__getitem__`` (:issue:`28283`)
- Bug in :meth:`Index.intersection` with non-:class:`Index` failing to set the correct name on the returned :class:`Index` (:issue:`38111`)
- Bug in :meth:`Index.union` behaving differently depending on whether operand is an :class:`Index` or other list-like (:issue:`36384`)
- Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError`` rather than a bare ``Exception`` (:issue:`35744`)
- Bug in ``dir`` where ``dir(obj)`` wouldn't show attributes defined on the instance for pandas objects (:issue:`37173`)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2820,7 +2820,7 @@ def intersection(self, other, sort=False):
"""
self._validate_sort_keyword(sort)
self._assert_can_do_setop(other)
other = ensure_index(other)
other, _ = self._convert_can_do_setop(other)

if self.equals(other):
return self._get_reconciled_name_object(other)
Expand Down
10 changes: 8 additions & 2 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -686,10 +686,15 @@ def intersection(self, other, sort=False):
"""
self._validate_sort_keyword(sort)
self._assert_can_do_setop(other)
other, _ = self._convert_can_do_setop(other)

if self.equals(other):
return self._get_reconciled_name_object(other)

return self._intersection(other, sort=sort)

def _intersection(self, other, sort=False):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

type if you can (doc-string if needed)


if len(self) == 0:
return self.copy()._get_reconciled_name_object(other)
if len(other) == 0:
Expand All @@ -704,10 +709,11 @@ def intersection(self, other, sort=False):
return result

elif not self._can_fast_intersect(other):
result = Index.intersection(self, other, sort=sort)
# We need to invalidate the freq because Index.intersection
result = Index._intersection(self, other, sort=sort)
# We need to invalidate the freq because Index._intersection
# uses _shallow_copy on a view of self._data, which will preserve
# self.freq if we're not careful.
result = self._wrap_setop_result(other, result)
return result._with_freq(None)._with_freq("infer")

# to make our life easier, "sort" the two ranges
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,11 @@ def setop_check(method):
def wrapped(self, other, sort=False):
self._validate_sort_keyword(sort)
self._assert_can_do_setop(other)
other = ensure_index(other)
other, _ = self._convert_can_do_setop(other)

if op_name == "intersection":
if self.equals(other):
return self._get_reconciled_name_object(other)

if not isinstance(other, IntervalIndex):
result = getattr(self.astype(object), op_name)(other)
Expand Down
12 changes: 9 additions & 3 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -3598,10 +3598,15 @@ def intersection(self, other, sort=False):
"""
self._validate_sort_keyword(sort)
self._assert_can_do_setop(other)
other, result_names = self._convert_can_do_setop(other)
other, _ = self._convert_can_do_setop(other)

if self.equals(other):
return self.rename(result_names)
return self._get_reconciled_name_object(other)

return self._intersection(other, sort=sort)

def _intersection(self, other, sort=False):
other, result_names = self._convert_can_do_setop(other)

if not is_object_dtype(other.dtype):
# The intersection is empty
Expand Down Expand Up @@ -3717,11 +3722,12 @@ def _convert_can_do_setop(self, other):
levels=[[]] * self.nlevels,
codes=[[]] * self.nlevels,
verify_integrity=False,
names=self.names,
)
else:
msg = "other must be a MultiIndex or a list of tuples"
try:
other = MultiIndex.from_tuples(other)
other = MultiIndex.from_tuples(other, names=self.names)
except TypeError as err:
raise TypeError(msg) from err
else:
Expand Down
10 changes: 7 additions & 3 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,15 +639,19 @@ def _setop(self, other, sort, opname: str):
def intersection(self, other, sort=False):
self._validate_sort_keyword(sort)
self._assert_can_do_setop(other)
other = ensure_index(other)
other, _ = self._convert_can_do_setop(other)

if self.equals(other):
return self._get_reconciled_name_object(other)

elif is_object_dtype(other.dtype):
return self._intersection(other, sort=sort)

def _intersection(self, other, sort=False):

if is_object_dtype(other.dtype):
return self.astype("O").intersection(other, sort=sort)

elif not is_dtype_equal(self.dtype, other.dtype):
elif not self._is_comparable_dtype(other.dtype):
# We can infer that the intersection is empty.
# assert_can_do_setop ensures that this is not just a mismatched freq
this = self[:0].astype("O")
Expand Down
9 changes: 8 additions & 1 deletion pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,12 +504,19 @@ def intersection(self, other, sort=False):
intersection : Index
"""
self._validate_sort_keyword(sort)
self._assert_can_do_setop(other)
other, _ = self._convert_can_do_setop(other)

if self.equals(other):
return self._get_reconciled_name_object(other)

return self._intersection(other, sort=sort)

def _intersection(self, other, sort=False):

if not isinstance(other, RangeIndex):
return super().intersection(other, sort=sort)
result = super()._intersection(other, sort=sort)
return self._wrap_setop_result(other, result)

if not len(self) or not len(other):
return self._simple_new(_empty_range)
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/indexes/datetimes/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,10 +471,11 @@ def test_intersection_bug(self):

def test_intersection_list(self):
# GH#35876
# values is not an Index -> no name -> retain "a"
values = [pd.Timestamp("2020-01-01"), pd.Timestamp("2020-02-01")]
idx = DatetimeIndex(values, name="a")
res = idx.intersection(values)
tm.assert_index_equal(res, idx.rename(None))
tm.assert_index_equal(res, idx)

def test_month_range_union_tz_pytz(self, sort):
from pytz import timezone
Expand Down
36 changes: 31 additions & 5 deletions pandas/tests/indexes/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,13 +98,20 @@ def test_compatible_inconsistent_pairs(idx_fact1, idx_fact2):
("Period[D]", "float64", "object"),
],
)
def test_union_dtypes(left, right, expected):
@pytest.mark.parametrize("names", [("foo", "foo", "foo"), ("foo", "bar", None)])
def test_union_dtypes(left, right, expected, names):
left = pandas_dtype(left)
right = pandas_dtype(right)
a = pd.Index([], dtype=left)
b = pd.Index([], dtype=right)
result = a.union(b).dtype
assert result == expected
a = pd.Index([], dtype=left, name=names[0])
b = pd.Index([], dtype=right, name=names[1])
result = a.union(b)
assert result.dtype == expected
assert result.name == names[2]

# Testing name retention
# TODO: pin down desired dtype; do we want it to be commutative?
result = a.intersection(b)
assert result.name == names[2]


def test_dunder_inplace_setops_deprecated(index):
Expand Down Expand Up @@ -378,6 +385,25 @@ def test_intersect_unequal(self, index, fname, sname, expected_name):
expected = index[1:].set_names(expected_name).sort_values()
tm.assert_index_equal(intersect, expected)

def test_intersection_name_retention_with_nameless(self, index):
if isinstance(index, MultiIndex):
index = index.rename(list(range(index.nlevels)))
else:
index = index.rename("foo")

other = np.asarray(index)

result = index.intersection(other)
assert result.name == index.name

# empty other, same dtype
result = index.intersection(other[:0])
assert result.name == index.name

# empty `self`
result = index[:0].intersection(other)
assert result.name == index.name

def test_difference_preserves_type_empty(self, index, sort):
# GH#20040
# If taking difference of a set and itself, it
Expand Down