From 7a0865ac13c0aa9072eee9a61f25d6952368db7e Mon Sep 17 00:00:00 2001 From: "[Annika Rudolph]" <[annika.rudolph@analytical-software.de]> Date: Tue, 23 Apr 2024 14:08:02 +0200 Subject: [PATCH 1/5] add take function including frequency for Timedelta and Datetime Arrays --- pandas/core/arrays/datetimelike.py | 38 +++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index c90ff410b4b93..f184e04e1fd7c 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -19,6 +19,7 @@ import numpy as np +from pandas.core.algorithms import take from pandas._config.config import get_option from pandas._libs import ( @@ -67,6 +68,7 @@ SequenceIndexer, TimeAmbiguous, TimeNonexistent, + TakeIndexer, npt, ) from pandas.compat.numpy import function as nv @@ -1768,7 +1770,7 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: ---------- freq : str or Offset The frequency level to {op} the index to. Must be a fixed - frequency like 's' (second) not 'ME' (month end). See + frequency like 'S' (second) not 'ME' (month end). See :ref:`frequency aliases ` for a list of possible `freq` values. ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' @@ -1806,11 +1808,6 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: ------ ValueError if the `freq` cannot be converted. - See Also - -------- - DatetimeIndex.floor : Perform floor operation on the data to the specified `freq`. - DatetimeIndex.snap : Snap time stamps to nearest occurring frequency. - Notes ----- If the timestamps have a timezone, {op}ing will take place relative to the @@ -2339,6 +2336,35 @@ def interpolate( if not copy: return self return type(self)._simple_new(out_data, dtype=self.dtype) + + def take( + self, + indices: TakeIndexer, + *, + allow_fill: bool = False, + fill_value: Any = None, + axis: AxisInt = 0, + ) -> Self: + + if allow_fill: + fill_value = self._validate_scalar(fill_value) + + new_data = take( + self._ndarray, + indices, + allow_fill=allow_fill, + fill_value=fill_value, + axis=axis, + ) + result = self._from_backing_data(new_data) + indices = np.asarray(indices, dtype=np.intp) + maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) + + if isinstance(maybe_slice, slice): + freq = self._get_getitem_freq(maybe_slice) + result.freq = freq + + return result # -------------------------------------------------------------- # Unsorted From 98a343a92e5112ae6d0f09e417d30cf0967e0036 Mon Sep 17 00:00:00 2001 From: "[Annika Rudolph]" <[annika.rudolph@analytical-software.de]> Date: Tue, 23 Apr 2024 14:09:00 +0200 Subject: [PATCH 2/5] add test for frequency of DatetimeIndex in MultiIndex --- pandas/core/arrays/datetimelike.py | 5 +++-- pandas/tests/indexes/multi/test_get_level_values.py | 11 +++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index f184e04e1fd7c..f1361c0f31f71 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -19,7 +19,6 @@ import numpy as np -from pandas.core.algorithms import take from pandas._config.config import get_option from pandas._libs import ( @@ -66,9 +65,9 @@ ScalarIndexer, Self, SequenceIndexer, + TakeIndexer, TimeAmbiguous, TimeNonexistent, - TakeIndexer, npt, ) from pandas.compat.numpy import function as nv @@ -118,6 +117,7 @@ from pandas.core.algorithms import ( isin, map_array, + take, unique1d, ) from pandas.core.array_algos import datetimelike_accumulations @@ -2357,6 +2357,7 @@ def take( axis=axis, ) result = self._from_backing_data(new_data) + indices = np.asarray(indices, dtype=np.intp) maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) diff --git a/pandas/tests/indexes/multi/test_get_level_values.py b/pandas/tests/indexes/multi/test_get_level_values.py index 28c77e78924cb..4b3c85106c941 100644 --- a/pandas/tests/indexes/multi/test_get_level_values.py +++ b/pandas/tests/indexes/multi/test_get_level_values.py @@ -122,3 +122,14 @@ def test_values_loses_freq_of_underlying_index(): midx.values assert idx.freq is not None tm.assert_index_equal(idx, expected) + + +def test_get_level_values_gets_frequency_correctly(): + # GH#57949 GH#58327 + datetime_index = pd.date_range(start=pd.to_datetime("1/1/2018"), + periods = 4, + freq = 'YS') + other_index = ["A"] + multi_index = pd.MultiIndex.from_product([datetime_index, other_index]) + + assert multi_index.get_level_values(0).freq is datetime_index.freq \ No newline at end of file From 8bab0a76ff61f0ab08c1dcc3a7e54a4b85137d09 Mon Sep 17 00:00:00 2001 From: annika-rudolph Date: Wed, 24 Apr 2024 08:28:47 +0200 Subject: [PATCH 3/5] use super() in take function --- pandas/core/arrays/datetimelike.py | 26 ++++++------------- .../indexes/multi/test_get_level_values.py | 10 +++---- 2 files changed, 13 insertions(+), 23 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index f1361c0f31f71..952fcc701a12a 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -117,7 +117,6 @@ from pandas.core.algorithms import ( isin, map_array, - take, unique1d, ) from pandas.core.array_algos import datetimelike_accumulations @@ -2336,7 +2335,7 @@ def interpolate( if not copy: return self return type(self)._simple_new(out_data, dtype=self.dtype) - + def take( self, indices: TakeIndexer, @@ -2345,27 +2344,18 @@ def take( fill_value: Any = None, axis: AxisInt = 0, ) -> Self: - - if allow_fill: - fill_value = self._validate_scalar(fill_value) - - new_data = take( - self._ndarray, - indices, - allow_fill=allow_fill, - fill_value=fill_value, - axis=axis, - ) - result = self._from_backing_data(new_data) - + result = super().take( + indices=indices, allow_fill=allow_fill, fill_value=fill_value, axis=axis + ) + indices = np.asarray(indices, dtype=np.intp) maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) - + if isinstance(maybe_slice, slice): freq = self._get_getitem_freq(maybe_slice) result.freq = freq - - return result + + return result # -------------------------------------------------------------- # Unsorted diff --git a/pandas/tests/indexes/multi/test_get_level_values.py b/pandas/tests/indexes/multi/test_get_level_values.py index 4b3c85106c941..cad918a5f94d5 100644 --- a/pandas/tests/indexes/multi/test_get_level_values.py +++ b/pandas/tests/indexes/multi/test_get_level_values.py @@ -126,10 +126,10 @@ def test_values_loses_freq_of_underlying_index(): def test_get_level_values_gets_frequency_correctly(): # GH#57949 GH#58327 - datetime_index = pd.date_range(start=pd.to_datetime("1/1/2018"), - periods = 4, - freq = 'YS') + datetime_index = pd.date_range( + start=pd.to_datetime("1/1/2018"), periods=4, freq="YS" + ) other_index = ["A"] - multi_index = pd.MultiIndex.from_product([datetime_index, other_index]) + multi_index = MultiIndex.from_product([datetime_index, other_index]) - assert multi_index.get_level_values(0).freq is datetime_index.freq \ No newline at end of file + assert multi_index.get_level_values(0).freq == datetime_index.freq From f2a777d7156a31c0dc6cf32c8b5810f3e0bdd95b Mon Sep 17 00:00:00 2001 From: annika-rudolph Date: Tue, 30 Apr 2024 17:13:09 +0200 Subject: [PATCH 4/5] add description to whatsnew and revert unwanted changes in datetimearray docstring make pre-commit happy --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/arrays/datetimelike.py | 7 ++++++- pandas/tests/indexes/multi/test_get_level_values.py | 4 +--- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index cd917924880f1..639655ab28199 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -556,6 +556,7 @@ MultiIndex - :func:`DataFrame.loc` with ``axis=0`` and :class:`MultiIndex` when setting a value adds extra columns (:issue:`58116`) - :meth:`DataFrame.melt` would not accept multiple names in ``var_name`` when the columns were a :class:`MultiIndex` (:issue:`58033`) - :meth:`MultiIndex.insert` would not insert NA value correctly at unified location of index -1 (:issue:`59003`) +- :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`) - I/O diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 952fcc701a12a..d53a952bc523c 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1769,7 +1769,7 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: ---------- freq : str or Offset The frequency level to {op} the index to. Must be a fixed - frequency like 'S' (second) not 'ME' (month end). See + frequency like 's' (second) not 'ME' (month end). See :ref:`frequency aliases ` for a list of possible `freq` values. ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' @@ -1807,6 +1807,11 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: ------ ValueError if the `freq` cannot be converted. + See Also + -------- + DatetimeIndex.floor : Perform floor operation on the data to the specified `freq`. + DatetimeIndex.snap : Snap time stamps to nearest occurring frequency. + Notes ----- If the timestamps have a timezone, {op}ing will take place relative to the diff --git a/pandas/tests/indexes/multi/test_get_level_values.py b/pandas/tests/indexes/multi/test_get_level_values.py index cad918a5f94d5..4db74a716c514 100644 --- a/pandas/tests/indexes/multi/test_get_level_values.py +++ b/pandas/tests/indexes/multi/test_get_level_values.py @@ -126,9 +126,7 @@ def test_values_loses_freq_of_underlying_index(): def test_get_level_values_gets_frequency_correctly(): # GH#57949 GH#58327 - datetime_index = pd.date_range( - start=pd.to_datetime("1/1/2018"), periods=4, freq="YS" - ) + datetime_index = date_range(start=pd.to_datetime("1/1/2018"), periods=4, freq="YS") other_index = ["A"] multi_index = MultiIndex.from_product([datetime_index, other_index]) From 0dd9feb467720fe4240c3fddfcfabae308401cfd Mon Sep 17 00:00:00 2001 From: annika-rudolph Date: Fri, 10 May 2024 08:53:26 +0200 Subject: [PATCH 5/5] switch .freq to ._freq --- pandas/core/arrays/datetimelike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index d53a952bc523c..ad0bde3abbdd4 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -2358,7 +2358,7 @@ def take( if isinstance(maybe_slice, slice): freq = self._get_getitem_freq(maybe_slice) - result.freq = freq + result._freq = freq return result