From 179e97c9c9e2ed0d5db600a041b67f1098dbc25f Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 1 Dec 2020 00:19:28 +0100 Subject: [PATCH 1/3] BUG: Avoid duplicates in DatetimeIndex.intersection --- pandas/core/indexes/datetimelike.py | 2 +- pandas/tests/indexes/datetimes/test_setops.py | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 28ff5a8bacc71..c573fd23f8f92 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -687,7 +687,7 @@ def intersection(self, other, sort=False): self._validate_sort_keyword(sort) self._assert_can_do_setop(other) - if self.equals(other): + if self.equals(other) and not self.has_duplicates: return self._get_reconciled_name_object(other) if len(self) == 0: diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index c8edd30e3f7aa..e0288908fc407 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -509,6 +509,19 @@ def test_month_range_union_tz_dateutil(self, sort): early_dr.union(late_dr, sort=sort) + def test_intersection_duplicates(self): + # GH# + idx1 = Index( + [ + pd.Timestamp("2019-12-12"), + pd.Timestamp("2019-12-13"), + pd.Timestamp("2019-12-12"), + ] + ) + result = idx1.intersection(idx1) + expected = Index([pd.Timestamp("2019-12-12"), pd.Timestamp("2019-12-13")]) + tm.assert_index_equal(result, expected) + class TestCustomDatetimeIndex: def setup_method(self, method): From 3e13e91320a5d3c2ada0633287450f02d6d93238 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 1 Dec 2020 20:39:58 +0100 Subject: [PATCH 2/3] Handle duplicates case --- pandas/core/indexes/datetimelike.py | 5 ++++- pandas/tests/indexes/datetimes/test_setops.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index c573fd23f8f92..2dc13ff2fd682 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -686,8 +686,11 @@ def intersection(self, other, sort=False): """ self._validate_sort_keyword(sort) self._assert_can_do_setop(other) + other, _ = self._convert_can_do_setop(other) - if self.equals(other) and not self.has_duplicates: + if self.equals(other): + if self.has_duplicates: + return self.unique()._get_reconciled_name_object(other) return self._get_reconciled_name_object(other) if len(self) == 0: diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index e0288908fc407..a21101be1169b 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -474,7 +474,7 @@ def test_intersection_list(self): values = [pd.Timestamp("2020-01-01"), pd.Timestamp("2020-02-01")] idx = DatetimeIndex(values, name="a") res = idx.intersection(values) - tm.assert_index_equal(res, idx.rename(None)) + tm.assert_index_equal(res, idx) def test_month_range_union_tz_pytz(self, sort): from pytz import timezone From fd52a407f17fa5a5b7b5e8b0df47778b14f08657 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 1 Dec 2020 20:43:11 +0100 Subject: [PATCH 3/3] Parametrize test --- pandas/tests/indexes/datetimes/test_setops.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index a21101be1169b..0127493888214 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -509,17 +509,18 @@ def test_month_range_union_tz_dateutil(self, sort): early_dr.union(late_dr, sort=sort) - def test_intersection_duplicates(self): - # GH# + @pytest.mark.parametrize("sort", [False, None]) + def test_intersection_duplicates(self, sort): + # GH#38196 idx1 = Index( [ - pd.Timestamp("2019-12-12"), pd.Timestamp("2019-12-13"), pd.Timestamp("2019-12-12"), + pd.Timestamp("2019-12-12"), ] ) - result = idx1.intersection(idx1) - expected = Index([pd.Timestamp("2019-12-12"), pd.Timestamp("2019-12-13")]) + result = idx1.intersection(idx1, sort=sort) + expected = Index([pd.Timestamp("2019-12-13"), pd.Timestamp("2019-12-12")]) tm.assert_index_equal(result, expected)