From 823a7d68899aebcc65291af81ae5c32bd736c86c Mon Sep 17 00:00:00 2001 From: furukawas Date: Tue, 28 May 2024 01:21:08 +0900 Subject: [PATCH 1/4] BUG: DataFrame.drop unexpectedly drop frequency --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/indexes/base.py | 10 +++++++++- pandas/tests/generic/test_generic.py | 16 ++++++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index a15da861cfbec..27d8792797e39 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -388,6 +388,7 @@ Categorical Datetimelike ^^^^^^^^^^^^ - Bug in :class:`Timestamp` constructor failing to raise when ``tz=None`` is explicitly specified in conjunction with timezone-aware ``tzinfo`` or data (:issue:`48688`) +- Bug in :func:`Datafreme.drop` returning ``Freq=None`` when the dataframe has a ``DatetimeIndex`` (:issue:`58743`) - Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`) - Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56382`) - Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6a3fb8bc851df..aa1f714f943cc 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -6949,7 +6949,15 @@ def drop( if errors != "ignore": raise KeyError(f"{labels[mask].tolist()} not found in axis") indexer = indexer[~mask] - return self.delete(indexer) + new_index = self.delete(indexer) + + # check if we need to set the freq attribute + from pandas import DatetimeIndex + + if isinstance(self, DatetimeIndex): + new_index.freq = self.freq + + return new_index @final def infer_objects(self, copy: bool = True) -> Index: diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 0b607d91baf65..236a84002c10a 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -3,6 +3,7 @@ deepcopy, ) +from datetime import datetime import numpy as np import pytest @@ -12,6 +13,7 @@ DataFrame, Index, Series, + Timestamp, date_range, ) import pandas._testing as tm @@ -483,3 +485,17 @@ def test_flags_identity(self, frame_or_series): assert obj.flags is obj.flags obj2 = obj.copy() assert obj2.flags is not obj.flags + + @pytest.mark.parametrize("freq", ["Y", "M", "D"]) + def test_drop_method_freq_preservation(self, freq): + start = "1970-01-01" + index = date_range(start=start, periods=10, freq=freq) + df = DataFrame((np.ones(len(index))), index=index) + + # set inplace as false + test_df = df.drop(index=df.index[0], inplace=False) + tm.assert_equal(test_df.index.freq, index.freq) + + # set inplace as true + df.drop(index=df.index[0], inplace=True) + tm.assert_equal(df.index.freq, index.freq) From f279c6191923f993c6171650e987b5025aeae6de Mon Sep 17 00:00:00 2001 From: furukawas Date: Tue, 28 May 2024 12:03:26 +0900 Subject: [PATCH 2/4] modify to infer freq --- pandas/core/indexes/base.py | 3 ++- pandas/tests/generic/test_generic.py | 10 +++++----- pandas/tests/indexes/datetimes/methods/test_delete.py | 4 ++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index aa1f714f943cc..bf2df54598e8d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -39,6 +39,7 @@ IncompatibleFrequency, OutOfBoundsDatetime, Timestamp, + to_offset, tz_compare, ) from pandas._typing import ( @@ -6955,7 +6956,7 @@ def drop( from pandas import DatetimeIndex if isinstance(self, DatetimeIndex): - new_index.freq = self.freq + new_index.freq = to_offset(new_index.inferred_freq) return new_index diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 236a84002c10a..22b079027b3bc 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -3,7 +3,6 @@ deepcopy, ) -from datetime import datetime import numpy as np import pytest @@ -13,7 +12,6 @@ DataFrame, Index, Series, - Timestamp, date_range, ) import pandas._testing as tm @@ -486,16 +484,18 @@ def test_flags_identity(self, frame_or_series): obj2 = obj.copy() assert obj2.flags is not obj.flags - @pytest.mark.parametrize("freq", ["Y", "M", "D"]) + @pytest.mark.parametrize("freq", ["YE", "ME", "D"]) def test_drop_method_freq_preservation(self, freq): start = "1970-01-01" + periods = 10 + index = date_range(start=start, periods=10, freq=freq) df = DataFrame((np.ones(len(index))), index=index) # set inplace as false - test_df = df.drop(index=df.index[0], inplace=False) + test_df = df.drop(index=df.index[[0, periods - 1]], inplace=False) tm.assert_equal(test_df.index.freq, index.freq) # set inplace as true - df.drop(index=df.index[0], inplace=True) + df.drop(index=df.index[[0, periods - 1]], inplace=True) tm.assert_equal(df.index.freq, index.freq) diff --git a/pandas/tests/indexes/datetimes/methods/test_delete.py b/pandas/tests/indexes/datetimes/methods/test_delete.py index 2341499977f22..cc2913b4f8d6a 100644 --- a/pandas/tests/indexes/datetimes/methods/test_delete.py +++ b/pandas/tests/indexes/datetimes/methods/test_delete.py @@ -132,9 +132,9 @@ def test_delete_slice2(self, tz, unit): assert result.freq == expected.freq assert result.tz == expected.tz - # reset freq to None + # reset freq result = ts.drop(ts.index[[1, 3, 5, 7, 9]]).index - expected = dti[::2]._with_freq(None) + expected = dti[::2]._with_freq("infer") tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == expected.freq From d8f2d98de79db4002a0ae5e907df100251051045 Mon Sep 17 00:00:00 2001 From: furukawas Date: Tue, 28 May 2024 15:45:54 +0900 Subject: [PATCH 3/4] fix pre-commit error --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index bf2df54598e8d..c635f73d05379 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -6955,7 +6955,7 @@ def drop( # check if we need to set the freq attribute from pandas import DatetimeIndex - if isinstance(self, DatetimeIndex): + if isinstance(self, DatetimeIndex) and isinstance(new_index, DatetimeIndex): new_index.freq = to_offset(new_index.inferred_freq) return new_index From f5176094edc6e2782a512664895539da551bd8c9 Mon Sep 17 00:00:00 2001 From: furukawas Date: Wed, 5 Jun 2024 13:30:53 +0900 Subject: [PATCH 4/4] override DatetimeIndex.drop --- pandas/core/indexes/base.py | 11 +---------- pandas/core/indexes/datetimes.py | 15 ++++++++++++++- pandas/tests/generic/test_generic.py | 3 ++- .../indexes/datetimes/methods/test_delete.py | 3 +-- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c635f73d05379..6a3fb8bc851df 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -39,7 +39,6 @@ IncompatibleFrequency, OutOfBoundsDatetime, Timestamp, - to_offset, tz_compare, ) from pandas._typing import ( @@ -6950,15 +6949,7 @@ def drop( if errors != "ignore": raise KeyError(f"{labels[mask].tolist()} not found in axis") indexer = indexer[~mask] - new_index = self.delete(indexer) - - # check if we need to set the freq attribute - from pandas import DatetimeIndex - - if isinstance(self, DatetimeIndex) and isinstance(new_index, DatetimeIndex): - new_index.freq = to_offset(new_index.inferred_freq) - - return new_index + return self.delete(indexer) @final def infer_objects(self, copy: bool = True) -> Index: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 78f04f57029b1..8b05581752ac8 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -48,12 +48,13 @@ from pandas.core.tools.times import to_time if TYPE_CHECKING: - from collections.abc import Hashable + from collections.abc import Hashable, Iterable from pandas._typing import ( Dtype, DtypeObj, Frequency, + IgnoreRaise, IntervalClosedType, Self, TimeAmbiguous, @@ -813,6 +814,18 @@ def indexer_between_time( return mask.nonzero()[0] + # -------------------------------------------------------------------- + + def drop( + self, + labels: Index | np.ndarray | Iterable[Hashable], + errors: IgnoreRaise = "raise", + ) -> DatetimeIndex: + if self.freq is not None: + return Index.drop(self, labels, errors)._with_freq("infer") # type: ignore[attr-defined] + else: + return Index.drop(self, labels, errors) # type: ignore[return-value] + def date_range( start=None, diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 22b079027b3bc..e2b1426fd62bb 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -486,10 +486,11 @@ def test_flags_identity(self, frame_or_series): @pytest.mark.parametrize("freq", ["YE", "ME", "D"]) def test_drop_method_freq_preservation(self, freq): + # GH 58846 start = "1970-01-01" periods = 10 - index = date_range(start=start, periods=10, freq=freq) + index = date_range(start=start, periods=periods, freq=freq) df = DataFrame((np.ones(len(index))), index=index) # set inplace as false diff --git a/pandas/tests/indexes/datetimes/methods/test_delete.py b/pandas/tests/indexes/datetimes/methods/test_delete.py index cc2913b4f8d6a..06822d6bc9004 100644 --- a/pandas/tests/indexes/datetimes/methods/test_delete.py +++ b/pandas/tests/indexes/datetimes/methods/test_delete.py @@ -132,9 +132,8 @@ def test_delete_slice2(self, tz, unit): assert result.freq == expected.freq assert result.tz == expected.tz - # reset freq result = ts.drop(ts.index[[1, 3, 5, 7, 9]]).index - expected = dti[::2]._with_freq("infer") + expected = dti[::2] tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == expected.freq