diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index afb2f91f65ccd..7639d75af89e1 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -624,6 +624,7 @@ Other - Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`) - Bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`) - Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`) +- Bug in :meth:`Index.difference` returning too many values / incorrect values for period indexes (:issue:`58971`) - Bug in :meth:`Index.sort_values` when passing a key function that turns values into tuples, e.g. ``key=natsort.natsort_key``, would raise ``TypeError`` (:issue:`56081`) - Bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`) - Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 71dfff520113c..405491f2644f4 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3406,7 +3406,7 @@ def difference(self, other, sort=None): return self._wrap_difference_result(other, result) def _difference(self, other, sort): - # overridden by RangeIndex + # overridden by RangeIndex, PeriodIndex this = self if isinstance(self, ABCCategoricalIndex) and self.hasnans and other.hasnans: this = this.dropna() diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index edd1fdd4da943..e673b31b56755 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -514,6 +514,15 @@ def shift(self, periods: int = 1, freq=None) -> Self: ) return self + periods + def _difference(self, other, sort=None) -> PeriodIndex: + if isinstance(other, Index) and other.inferred_type == "string": + try: + other = other.astype(self.dtype) + except (TypeError, ValueError): + pass + + return super()._difference(other, sort=sort) + def period_range( start=None, diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py index 2fa7e8cd0d2df..3d31f85975592 100644 --- a/pandas/tests/indexes/period/test_setops.py +++ b/pandas/tests/indexes/period/test_setops.py @@ -314,6 +314,19 @@ def test_difference(self, sort): expected = expected.sort_values() tm.assert_index_equal(result_difference, expected) + def test_difference_mismatched_dtypes(self, sort): + # GH58971 + index = period_range("2022-01-01", periods=5, freq="M") + other = pd.Index(["2022-02", "2022-03"]) + + idx_diff = index.difference(other, sort) + expected = PeriodIndex(["2022-01", "2022-04", "2022-05"], freq="M") + tm.assert_index_equal(idx_diff, expected) + + idx_diff = other.difference(index, sort) + expected = pd.Index([]) + tm.assert_index_equal(idx_diff, expected) + def test_difference_freq(self, sort): # GH14323: difference of Period MUST preserve frequency # but the ability to union results must be preserved