From 4919878efafdc8c8ceac4108fb028c92679ffa30 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 14 Nov 2023 13:45:04 -0800 Subject: [PATCH 1/4] DEPR: PeriodIndex ordinal, fields keywords --- doc/source/whatsnew/v2.2.0.rst | 2 + pandas/core/arrays/period.py | 15 ++--- pandas/core/indexes/period.py | 64 ++++++++++++++++--- pandas/io/pytables.py | 6 +- .../period/methods/test_to_timestamp.py | 2 +- .../tests/indexes/period/test_constructors.py | 47 ++++++++++---- pandas/tests/indexes/period/test_period.py | 13 +++- pandas/tests/indexes/test_old_base.py | 4 +- 8 files changed, 119 insertions(+), 34 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 9040dba238c88..48aee18c90456 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -264,6 +264,7 @@ Other Deprecations - Changed :meth:`Timedelta.resolution_string` to return ``h``, ``min``, ``s``, ``ms``, ``us``, and ``ns`` instead of ``H``, ``T``, ``S``, ``L``, ``U``, and ``N``, for compatibility with respective deprecations in frequency aliases (:issue:`52536`) - Deprecated :func:`read_gbq` and :meth:`DataFrame.to_gbq`. Use ``pandas_gbq.read_gbq`` and ``pandas_gbq.to_gbq`` instead https://pandas-gbq.readthedocs.io/en/latest/api.html (:issue:`55525`) - Deprecated :meth:`Index.format`, use ``index.astype(str)`` or ``index.map(formatter)`` instead (:issue:`55413`) +- Deprecated ``year``, ``month``, ``quarter``, ``day``, ``hour``, ``minute``, and ``second`` keywords in the :class:`PeriodIndex` constructor, use :meth:`PeriodIndex.from_fields` instead (:issue:`55960`) - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_clipboard`. (:issue:`54229`) - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_csv` except ``path_or_buf``. (:issue:`54229`) - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_dict`. (:issue:`54229`) @@ -294,6 +295,7 @@ Other Deprecations - Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`) - Deprecated the ``errors="ignore"`` option in :func:`to_datetime`, :func:`to_timedelta`, and :func:`to_numeric`; explicitly catch exceptions instead (:issue:`54467`) - Deprecated the ``fastpath`` keyword in the :class:`Series` constructor (:issue:`20110`) +- Deprecated the ``ordinal`` keyword in :class:`PeriodIndex`, use :meth:`PeriodIndex.from_ordinals` instead (:issue:`55960`) - Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`) - Deprecated the option ``mode.data_manager`` and the ``ArrayManager``; only the ``BlockManager`` will be available in future versions (:issue:`55043`) - Deprecated the previous implementation of :class:`DataFrame.stack`; specify ``future_stack=True`` to adopt the future version (:issue:`53515`) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 25659d8738057..a147bc249c450 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -329,26 +329,25 @@ def _from_datetime64(cls, data, freq, tz=None) -> Self: return cls(data, dtype=dtype) @classmethod - def _generate_range(cls, start, end, periods, freq, fields): + def _generate_range(cls, start, end, periods, freq) -> Self: periods = dtl.validate_periods(periods) if freq is not None: freq = Period._maybe_convert_freq(freq) - field_count = len(fields) if start is not None or end is not None: - if field_count > 0: - raise ValueError( - "Can either instantiate from fields or endpoints, but not both" - ) subarr, freq = _get_ordinal_range(start, end, periods, freq) - elif field_count > 0: - subarr, freq = _range_from_fields(freq=freq, **fields) else: raise ValueError("Not enough parameters to construct Period range") return subarr, freq + @classmethod + def _from_fields(cls, *, fields: dict, freq) -> Self: + subarr, freq = _range_from_fields(freq=freq, **fields) + dtype = PeriodDtype(freq) + return cls._simple_new(subarr, dtype=dtype) + # ----------------------------------------------------------------- # DatetimeLike Interface diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index faf058eff4bf4..a228fb6d71419 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -5,6 +5,7 @@ timedelta, ) from typing import TYPE_CHECKING +import warnings import numpy as np @@ -21,6 +22,7 @@ cache_readonly, doc, ) +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import is_integer from pandas.core.dtypes.dtypes import PeriodDtype @@ -233,6 +235,24 @@ def __new__( if not set(fields).issubset(valid_field_set): argument = next(iter(set(fields) - valid_field_set)) raise TypeError(f"__new__() got an unexpected keyword argument {argument}") + elif len(fields): + # GH#55960 + warnings.warn( + "Constructing PeriodIndex from fields is deprecated. Use " + "PeriodIndex.from_fields instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if ordinal is not None: + # GH#55960 + warnings.warn( + "The 'ordinal' keyword in PeriodIndex is deprecated and will " + "be removed in a future version. Use PeriodIndex.from_ordinals " + "instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) name = maybe_extract_name(name, data, cls) @@ -241,14 +261,9 @@ def __new__( if not fields: # test_pickle_compat_construction cls._raise_scalar_data_error(None) + data = cls.from_fields(**fields, freq=freq)._data + copy = False - data, freq2 = PeriodArray._generate_range(None, None, None, freq, fields) - # PeriodArray._generate range does validation that fields is - # empty when really using the range-based constructor. - freq = freq2 - - dtype = PeriodDtype(freq) - data = PeriodArray(data, dtype=dtype) elif fields: if data is not None: raise ValueError("Cannot pass both data and fields") @@ -280,6 +295,39 @@ def __new__( return cls._simple_new(data, name=name, refs=refs) + @classmethod + def from_fields( + cls, + *, + year=None, + quarter=None, + month=None, + day=None, + hour=None, + minute=None, + second=None, + freq=None, + ) -> Self: + fields = { + "year": year, + "quarter": quarter, + "month": month, + "day": day, + "hour": hour, + "minute": minute, + "second": second, + } + fields = {key: fields[key] for key in fields if fields[key] is not None} + arr = PeriodArray._from_fields(fields=fields, freq=freq) + return cls._simple_new(arr) + + @classmethod + def from_ordinals(cls, ordinals, *, freq, name=None) -> Self: + ordinals = np.asarray(ordinals, dtype=np.int64) + dtype = PeriodDtype(freq) + data = PeriodArray._simple_new(ordinals, dtype=dtype) + return cls._simple_new(data, name=name) + # ------------------------------------------------------------------------ # Data @@ -537,7 +585,7 @@ def period_range( if freq is None and (not isinstance(start, Period) and not isinstance(end, Period)): freq = "D" - data, freq = PeriodArray._generate_range(start, end, periods, freq, fields={}) + data, freq = PeriodArray._generate_range(start, end, periods, freq) dtype = PeriodDtype(freq) data = PeriodArray(data, dtype=dtype) return PeriodIndex(data, name=name) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 3659e7247495d..9e0e3686e4aa2 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2169,8 +2169,10 @@ def convert( # error: Incompatible types in assignment (expression has type # "Callable[[Any, KwArg(Any)], PeriodIndex]", variable has type # "Union[Type[Index], Type[DatetimeIndex]]") - factory = lambda x, **kwds: PeriodIndex( # type: ignore[assignment] - ordinal=x, **kwds + factory = lambda x, **kwds: PeriodIndex.from_ordinals( # type: ignore[assignment] + x, freq=kwds.get("freq", None) + )._rename( + kwds["name"] ) # making an Index instance could throw a number of different errors diff --git a/pandas/tests/indexes/period/methods/test_to_timestamp.py b/pandas/tests/indexes/period/methods/test_to_timestamp.py index 977ad8b26a369..7be2602135578 100644 --- a/pandas/tests/indexes/period/methods/test_to_timestamp.py +++ b/pandas/tests/indexes/period/methods/test_to_timestamp.py @@ -87,7 +87,7 @@ def test_to_timestamp_quarterly_bug(self): years = np.arange(1960, 2000).repeat(4) quarters = np.tile(list(range(1, 5)), 40) - pindex = PeriodIndex(year=years, quarter=quarters) + pindex = PeriodIndex.from_fields(year=years, quarter=quarters) stamps = pindex.to_timestamp("D", "end") expected = DatetimeIndex([x.to_timestamp("D", "end") for x in pindex]) diff --git a/pandas/tests/indexes/period/test_constructors.py b/pandas/tests/indexes/period/test_constructors.py index 79837af79c189..a1923d29d3d0e 100644 --- a/pandas/tests/indexes/period/test_constructors.py +++ b/pandas/tests/indexes/period/test_constructors.py @@ -23,18 +23,23 @@ class TestPeriodIndex: def test_keyword_mismatch(self): # GH#55961 we should get exactly one of data/ordinals/**fields per = Period("2016-01-01", "D") + depr_msg1 = "The 'ordinal' keyword in PeriodIndex is deprecated" + depr_msg2 = "Constructing PeriodIndex from fields is deprecated" err_msg1 = "Cannot pass both data and ordinal" with pytest.raises(ValueError, match=err_msg1): - PeriodIndex(data=[per], ordinal=[per.ordinal], freq=per.freq) + with tm.assert_produces_warning(FutureWarning, match=depr_msg1): + PeriodIndex(data=[per], ordinal=[per.ordinal], freq=per.freq) err_msg2 = "Cannot pass both data and fields" with pytest.raises(ValueError, match=err_msg2): - PeriodIndex(data=[per], year=[per.year], freq=per.freq) + with tm.assert_produces_warning(FutureWarning, match=depr_msg2): + PeriodIndex(data=[per], year=[per.year], freq=per.freq) err_msg3 = "Cannot pass both ordinal and fields" with pytest.raises(ValueError, match=err_msg3): - PeriodIndex(ordinal=[per.ordinal], year=[per.year], freq=per.freq) + with tm.assert_produces_warning(FutureWarning, match=depr_msg2): + PeriodIndex(ordinal=[per.ordinal], year=[per.year], freq=per.freq) def test_construction_base_constructor(self): # GH 13664 @@ -94,14 +99,18 @@ def test_constructor_field_arrays(self): years = np.arange(1990, 2010).repeat(4)[2:-2] quarters = np.tile(np.arange(1, 5), 20)[2:-2] - index = PeriodIndex(year=years, quarter=quarters, freq="Q-DEC") + depr_msg = "Constructing PeriodIndex from fields is deprecated" + with tm.assert_produces_warning(FutureWarning, match=depr_msg): + index = PeriodIndex(year=years, quarter=quarters, freq="Q-DEC") expected = period_range("1990Q3", "2009Q2", freq="Q-DEC") tm.assert_index_equal(index, expected) - index2 = PeriodIndex(year=years, quarter=quarters, freq="2Q-DEC") + with tm.assert_produces_warning(FutureWarning, match=depr_msg): + index2 = PeriodIndex(year=years, quarter=quarters, freq="2Q-DEC") tm.assert_numpy_array_equal(index.asi8, index2.asi8) - index = PeriodIndex(year=years, quarter=quarters) + with tm.assert_produces_warning(FutureWarning, match=depr_msg): + index = PeriodIndex(year=years, quarter=quarters) tm.assert_index_equal(index, expected) years = [2007, 2007, 2007] @@ -109,13 +118,16 @@ def test_constructor_field_arrays(self): msg = "Mismatched Period array lengths" with pytest.raises(ValueError, match=msg): - PeriodIndex(year=years, month=months, freq="M") + with tm.assert_produces_warning(FutureWarning, match=depr_msg): + PeriodIndex(year=years, month=months, freq="M") with pytest.raises(ValueError, match=msg): - PeriodIndex(year=years, month=months, freq="2M") + with tm.assert_produces_warning(FutureWarning, match=depr_msg): + PeriodIndex(year=years, month=months, freq="2M") years = [2007, 2007, 2007] months = [1, 2, 3] - idx = PeriodIndex(year=years, month=months, freq="M") + with tm.assert_produces_warning(FutureWarning, match=depr_msg): + idx = PeriodIndex(year=years, month=months, freq="M") exp = period_range("2007-01", periods=3, freq="M") tm.assert_index_equal(idx, exp) @@ -145,15 +157,24 @@ def test_constructor_arrays_negative_year(self): years = np.arange(1960, 2000, dtype=np.int64).repeat(4) quarters = np.tile(np.array([1, 2, 3, 4], dtype=np.int64), 40) - pindex = PeriodIndex(year=years, quarter=quarters) + msg = "Constructing PeriodIndex from fields is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + pindex = PeriodIndex(year=years, quarter=quarters) tm.assert_index_equal(pindex.year, Index(years)) tm.assert_index_equal(pindex.quarter, Index(quarters)) + alt = PeriodIndex.from_fields(year=years, quarter=quarters) + tm.assert_index_equal(alt, pindex) + def test_constructor_invalid_quarters(self): + depr_msg = "Constructing PeriodIndex from fields is deprecated" msg = "Quarter must be 1 <= q <= 4" with pytest.raises(ValueError, match=msg): - PeriodIndex(year=range(2000, 2004), quarter=list(range(4)), freq="Q-DEC") + with tm.assert_produces_warning(FutureWarning, match=depr_msg): + PeriodIndex( + year=range(2000, 2004), quarter=list(range(4)), freq="Q-DEC" + ) def test_constructor_corner(self): result = period_range("2007-01", periods=10.5, freq="M") @@ -394,7 +415,9 @@ def test_constructor_nat(self): def test_constructor_year_and_quarter(self): year = Series([2001, 2002, 2003]) quarter = year - 2000 - idx = PeriodIndex(year=year, quarter=quarter) + msg = "Constructing PeriodIndex from fields is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + idx = PeriodIndex(year=year, quarter=quarter) strs = [f"{t[0]:d}Q{t[1]:d}" for t in zip(quarter, year)] lops = list(map(Period, strs)) p = PeriodIndex(lops) diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 6965aaf19f8fb..c30535a012012 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -214,10 +214,19 @@ def test_negative_ordinals(self): Period(ordinal=-1000, freq="Y") Period(ordinal=0, freq="Y") - idx1 = PeriodIndex(ordinal=[-1, 0, 1], freq="Y") - idx2 = PeriodIndex(ordinal=np.array([-1, 0, 1]), freq="Y") + msg = "The 'ordinal' keyword in PeriodIndex is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + idx1 = PeriodIndex(ordinal=[-1, 0, 1], freq="Y") + with tm.assert_produces_warning(FutureWarning, match=msg): + idx2 = PeriodIndex(ordinal=np.array([-1, 0, 1]), freq="Y") tm.assert_index_equal(idx1, idx2) + alt1 = PeriodIndex.from_ordinals([-1, 0, 1], freq="Y") + tm.assert_index_equal(alt1, idx1) + + alt2 = PeriodIndex.from_ordinals(np.array([-1, 0, 1]), freq="Y") + tm.assert_index_equal(alt2, idx2) + def test_pindex_fieldaccessor_nat(self): idx = PeriodIndex( ["2011-01", "2011-02", "NaT", "2012-03", "2012-04"], freq="D", name="name" diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py index 30e3f5cee05b4..f08de8e65451c 100644 --- a/pandas/tests/indexes/test_old_base.py +++ b/pandas/tests/indexes/test_old_base.py @@ -272,7 +272,9 @@ def test_ensure_copied_data(self, index): if isinstance(index, PeriodIndex): # .values an object array of Period, thus copied - result = index_type(ordinal=index.asi8, copy=False, **init_kwargs) + depr_msg = "The 'ordinal' keyword in PeriodIndex is deprecated" + with tm.assert_produces_warning(FutureWarning, match=depr_msg): + result = index_type(ordinal=index.asi8, copy=False, **init_kwargs) tm.assert_numpy_array_equal(index.asi8, result.asi8, check_same="same") elif isinstance(index, IntervalIndex): # checked in test_interval.py From c3b4c95ed074e0b1f983de11b900c89503b27ba7 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 14 Nov 2023 16:03:31 -0800 Subject: [PATCH 2/4] lint fixups --- pandas/core/arrays/period.py | 2 +- pandas/core/indexes/period.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index a147bc249c450..57b244e8d02e9 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -329,7 +329,7 @@ def _from_datetime64(cls, data, freq, tz=None) -> Self: return cls(data, dtype=dtype) @classmethod - def _generate_range(cls, start, end, periods, freq) -> Self: + def _generate_range(cls, start, end, periods, freq): periods = dtl.validate_periods(periods) if freq is not None: diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index a228fb6d71419..40c68d605b13b 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -317,7 +317,7 @@ def from_fields( "minute": minute, "second": second, } - fields = {key: fields[key] for key in fields if fields[key] is not None} + fields = {key: value for key, value in fields.items() if value is not None} arr = PeriodArray._from_fields(fields=fields, freq=freq) return cls._simple_new(arr) From 2f349c2f2f32c5a73d107d66d48432d201d4d108 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 14 Nov 2023 18:36:54 -0800 Subject: [PATCH 3/4] update doctest --- pandas/core/indexes/period.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 40c68d605b13b..fcad47c7a260e 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -148,7 +148,7 @@ class PeriodIndex(DatetimeIndexOpsMixin): Examples -------- - >>> idx = pd.PeriodIndex(year=[2000, 2002], quarter=[1, 3]) + >>> idx = pd.PeriodIndex.from_fields(year=[2000, 2002], quarter=[1, 3]) >>> idx PeriodIndex(['2000Q1', '2002Q3'], dtype='period[Q-DEC]') """ From f362f0db2f99cf512120d3d60331fbea30015e4b Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 15 Nov 2023 10:25:10 -0800 Subject: [PATCH 4/4] doc updates --- doc/source/reference/indexing.rst | 2 ++ pandas/core/indexes/period.py | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst index 25e5b3b46b4f3..fa6105761df0a 100644 --- a/doc/source/reference/indexing.rst +++ b/doc/source/reference/indexing.rst @@ -489,3 +489,5 @@ Methods PeriodIndex.asfreq PeriodIndex.strftime PeriodIndex.to_timestamp + PeriodIndex.from_fields + PeriodIndex.from_ordinals diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index fcad47c7a260e..b2f1933800fd3 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -99,12 +99,33 @@ class PeriodIndex(DatetimeIndexOpsMixin): freq : str or period object, optional One of pandas period strings or corresponding objects. year : int, array, or Series, default None + + .. deprecated:: 2.2.0 + Use PeriodIndex.from_fields instead. month : int, array, or Series, default None + + .. deprecated:: 2.2.0 + Use PeriodIndex.from_fields instead. quarter : int, array, or Series, default None + + .. deprecated:: 2.2.0 + Use PeriodIndex.from_fields instead. day : int, array, or Series, default None + + .. deprecated:: 2.2.0 + Use PeriodIndex.from_fields instead. hour : int, array, or Series, default None + + .. deprecated:: 2.2.0 + Use PeriodIndex.from_fields instead. minute : int, array, or Series, default None + + .. deprecated:: 2.2.0 + Use PeriodIndex.from_fields instead. second : int, array, or Series, default None + + .. deprecated:: 2.2.0 + Use PeriodIndex.from_fields instead. dtype : str or PeriodDtype, default None Attributes @@ -137,6 +158,8 @@ class PeriodIndex(DatetimeIndexOpsMixin): asfreq strftime to_timestamp + from_fields + from_ordinals See Also --------