From 5dd91bc07f46146c07ce1869baa94e121272b0ac Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 5 Apr 2023 12:49:35 -0700 Subject: [PATCH 1/5] REF: remove freq arg from PeriodArray constructor --- pandas/core/arrays/datetimelike.py | 7 +- pandas/core/arrays/period.py | 65 ++++++------ pandas/core/dtypes/dtypes.py | 4 +- pandas/core/indexes/period.py | 16 +-- pandas/io/pytables.py | 4 +- .../tests/arrays/period/test_arrow_compat.py | 8 +- .../tests/arrays/period/test_constructors.py | 7 +- pandas/tests/arrays/test_datetimelike.py | 98 +++++++++---------- pandas/tests/arrays/test_period.py | 40 ++++---- pandas/tests/base/test_conversion.py | 2 +- pandas/tests/dtypes/test_generic.py | 5 +- pandas/tests/extension/test_period.py | 12 +-- .../indexes/datetimelike_/test_equals.py | 2 +- .../tests/indexes/period/test_constructors.py | 4 +- pandas/tests/plotting/test_converter.py | 2 +- 15 files changed, 145 insertions(+), 131 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 665c214c1e9a7..1fc1523fb5d32 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1106,7 +1106,8 @@ def _add_period(self, other: Period) -> PeriodArray: from pandas.core.arrays.period import PeriodArray i8vals = np.broadcast_to(other.ordinal, self.shape) - parr = PeriodArray(i8vals, freq=other.freq) + dtype = PeriodDtype(other.freq) + parr = PeriodArray(i8vals, dtype=dtype) return parr + self def _add_offset(self, offset): @@ -1282,9 +1283,7 @@ def _accumulate(self, name: str, *, skipna: bool = True, **kwargs) -> Self: op = getattr(datetimelike_accumulations, name) result = op(self.copy(), skipna=skipna, **kwargs) - return type(self)._simple_new( - result, freq=None, dtype=self.dtype # type: ignore[call-arg] - ) + return type(self)._simple_new(result, dtype=self.dtype) @unpack_zerodim_and_defer("__add__") def __add__(self, other): diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 79da8fd3826fe..3707c04b7e387 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -9,6 +9,7 @@ Literal, Sequence, TypeVar, + cast, overload, ) @@ -116,9 +117,7 @@ class PeriodArray(dtl.DatelikeOps, libperiod.PeriodMixin): # type: ignore[misc] """ Pandas ExtensionArray for storing Period data. - Users should use :func:`~pandas.period_array` to create new instances. - Alternatively, :func:`~pandas.array` can be used to create new instances - from a sequence of Period scalars. + Users should use :func:`~pandas.array` to create new instances. Parameters ---------- @@ -210,13 +209,11 @@ def _scalar_type(self) -> type[Period]: # -------------------------------------------------------------------- # Constructors - def __init__( - self, values, dtype: Dtype | None = None, freq=None, copy: bool = False - ) -> None: - freq = validate_dtype_freq(dtype, freq) - - if freq is not None: - freq = Period._maybe_convert_freq(freq) + def __init__(self, values, dtype: Dtype | None = None, copy: bool = False) -> None: + if dtype is not None: + dtype = pandas_dtype(dtype) + if not isinstance(dtype, PeriodDtype): + raise ValueError(f"Invalid dtype {dtype} for PeriodArray") if isinstance(values, ABCSeries): values = values._values @@ -227,36 +224,38 @@ def __init__( values = values._values if isinstance(values, type(self)): - if freq is not None and freq != values.freq: - raise raise_on_incompatible(values, freq) - values, freq = values._ndarray, values.freq + if dtype is not None and dtype != values.dtype: + raise raise_on_incompatible(values, dtype.freq) + values, dtype = values._ndarray, values.dtype values = np.array(values, dtype="int64", copy=copy) - if freq is None: - raise ValueError("freq is not specified and cannot be inferred") - NDArrayBacked.__init__(self, values, PeriodDtype(freq)) + if dtype is None: + raise ValueError("dtype is not specified and cannot be inferred") + dtype = cast(PeriodDtype, dtype) + NDArrayBacked.__init__(self, values, dtype) # error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked" @classmethod def _simple_new( # type: ignore[override] cls, values: npt.NDArray[np.int64], - freq: BaseOffset | None = None, - dtype: Dtype | None = None, + dtype: PeriodDtype, ) -> Self: # alias for PeriodArray.__init__ assertion_msg = "Should be numpy array of type i8" assert isinstance(values, np.ndarray) and values.dtype == "i8", assertion_msg - return cls(values, freq=freq, dtype=dtype) + return cls(values, dtype=dtype) @classmethod def _from_sequence( cls, - scalars: Sequence[Period | None] | AnyArrayLike, + scalars, *, dtype: Dtype | None = None, copy: bool = False, ) -> Self: + if dtype is not None: + dtype = pandas_dtype(dtype) if dtype and isinstance(dtype, PeriodDtype): freq = dtype.freq else: @@ -266,16 +265,14 @@ def _from_sequence( validate_dtype_freq(scalars.dtype, freq) if copy: scalars = scalars.copy() - # error: Incompatible return value type - # (got "Union[Sequence[Optional[Period]], Union[Union[ExtensionArray, - # ndarray[Any, Any]], Index, Series]]", expected "PeriodArray") - return scalars # type: ignore[return-value] + return scalars periods = np.asarray(scalars, dtype=object) freq = freq or libperiod.extract_freq(periods) ordinals = libperiod.extract_ordinals(periods, freq) - return cls(ordinals, freq=freq) + dtype = PeriodDtype(freq) + return cls(ordinals, dtype=dtype) @classmethod def _from_sequence_of_strings( @@ -299,7 +296,8 @@ def _from_datetime64(cls, data, freq, tz=None) -> Self: PeriodArray[freq] """ data, freq = dt64arr_to_periodarr(data, freq, tz) - return cls(data, freq=freq) + dtype = PeriodDtype(freq) + return cls(data, dtype=dtype) @classmethod def _generate_range(cls, start, end, periods, freq, fields): @@ -610,7 +608,8 @@ def asfreq(self, freq=None, how: str = "E") -> Self: if self._hasna: new_data[self._isnan] = iNaT - return type(self)(new_data, freq=freq) + dtype = PeriodDtype(freq) + return type(self)(new_data, dtype=dtype) # ------------------------------------------------------------------ # Rendering Methods @@ -697,7 +696,7 @@ def _addsub_int_array_or_scalar( if op is operator.sub: other = -other res_values = algos.checked_add_with_arr(self.asi8, other, arr_mask=self._isnan) - return type(self)(res_values, freq=self.freq) + return type(self)(res_values, dtype=self.dtype) def _add_offset(self, other: BaseOffset): assert not isinstance(other, Tick) @@ -768,7 +767,7 @@ def _add_timedelta_arraylike( self.asi8, delta.view("i8"), arr_mask=self._isnan, b_mask=b_mask ) np.putmask(res_values, self._isnan | b_mask, iNaT) - return type(self)(res_values, freq=self.freq) + return type(self)(res_values, dtype=self.dtype) def _check_timedeltalike_freq_compat(self, other): """ @@ -845,6 +844,7 @@ def period_array( data: Sequence[Period | str | None] | AnyArrayLike, freq: str | Tick | None = None, copy: bool = False, + warn=True, ) -> PeriodArray: """ Construct a new PeriodArray from a sequence of Period scalars. @@ -904,7 +904,12 @@ def period_array( if is_datetime64_dtype(data_dtype): return PeriodArray._from_datetime64(data, freq) if isinstance(data_dtype, PeriodDtype): - return PeriodArray(data, freq=freq) + out = PeriodArray(data) + if freq is not None: + if freq == data_dtype.freq: + return out + return out.asfreq(freq) + return out # other iterable of some kind if not isinstance(data, (np.ndarray, list, tuple, ABCSeries)): diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index ac52e43472cbe..da999a8f204c8 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1013,14 +1013,14 @@ def __from_arrow__( results = [] for arr in chunks: data, mask = pyarrow_array_to_numpy_and_mask(arr, dtype=np.dtype(np.int64)) - parr = PeriodArray(data.copy(), freq=self.freq, copy=False) + parr = PeriodArray(data.copy(), dtype=self, copy=False) # error: Invalid index type "ndarray[Any, dtype[bool_]]" for "PeriodArray"; # expected type "Union[int, Sequence[int], Sequence[bool], slice]" parr[~mask] = NaT # type: ignore[index] results.append(parr) if not results: - return PeriodArray(np.array([], dtype="int64"), freq=self.freq, copy=False) + return PeriodArray(np.array([], dtype="int64"), dtype=self, copy=False) return PeriodArray._concat_same_type(results) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index b2be354e99213..284b09f8a2e75 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -25,6 +25,7 @@ ) from pandas.core.dtypes.common import is_integer +from pandas.core.dtypes.dtypes import PeriodDtype from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import is_valid_na_for_dtype @@ -52,7 +53,6 @@ npt, ) - from pandas.core.dtypes.dtypes import PeriodDtype _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update({"target_klass": "PeriodIndex or list of Periods"}) @@ -68,7 +68,8 @@ def _new_PeriodIndex(cls, **d): values = d.pop("data") if values.dtype == "int64": freq = d.pop("freq", None) - values = PeriodArray(values, freq=freq) + dtype = PeriodDtype(freq) + values = PeriodArray(values, dtype=dtype) return cls._simple_new(values, **d) else: return cls(values, **d) @@ -246,7 +247,8 @@ def __new__( # empty when really using the range-based constructor. freq = freq2 - data = PeriodArray(data, freq=freq) + dtype = PeriodDtype(freq) + data = PeriodArray(data, dtype=dtype) else: freq = validate_dtype_freq(dtype, freq) @@ -261,10 +263,11 @@ def __new__( if data is None and ordinal is not None: # we strangely ignore `ordinal` if data is passed. ordinal = np.asarray(ordinal, dtype=np.int64) - data = PeriodArray(ordinal, freq=freq) + dtype = PeriodDtype(freq) + data = PeriodArray(ordinal, dtype=dtype) else: # don't pass copy here, since we copy later. - data = period_array(data=data, freq=freq) + data = period_array(data=data, freq=freq, warn=False) if copy: data = data.copy() @@ -535,5 +538,6 @@ def period_range( freq = "D" data, freq = PeriodArray._generate_range(start, end, periods, freq, fields={}) - data = PeriodArray(data, freq=freq) + dtype = PeriodDtype(freq) + data = PeriodArray(data, dtype=dtype) return PeriodIndex(data, name=name) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index e579166c002d3..6e00911a5a44d 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -68,6 +68,7 @@ CategoricalDtype, DatetimeTZDtype, ExtensionDtype, + PeriodDtype, ) from pandas.core.dtypes.missing import array_equivalent @@ -2790,7 +2791,8 @@ def f(values, freq=None, tz=None): elif index_class == PeriodIndex: def f(values, freq=None, tz=None): - parr = PeriodArray._simple_new(values, freq=freq) + dtype = PeriodDtype(freq) + parr = PeriodArray._simple_new(values, dtype=dtype) return PeriodIndex._simple_new(parr, name=None) factory = f diff --git a/pandas/tests/arrays/period/test_arrow_compat.py b/pandas/tests/arrays/period/test_arrow_compat.py index d76b04df4c9d7..903fc3177aa84 100644 --- a/pandas/tests/arrays/period/test_arrow_compat.py +++ b/pandas/tests/arrays/period/test_arrow_compat.py @@ -62,7 +62,7 @@ def test_arrow_array(data, freq): def test_arrow_array_missing(): from pandas.core.arrays.arrow.extension_types import ArrowPeriodType - arr = PeriodArray([1, 2, 3], freq="D") + arr = PeriodArray([1, 2, 3], dtype="period[D]") arr[1] = pd.NaT result = pa.array(arr) @@ -75,7 +75,7 @@ def test_arrow_array_missing(): def test_arrow_table_roundtrip(): from pandas.core.arrays.arrow.extension_types import ArrowPeriodType - arr = PeriodArray([1, 2, 3], freq="D") + arr = PeriodArray([1, 2, 3], dtype="period[D]") arr[1] = pd.NaT df = pd.DataFrame({"a": arr}) @@ -96,7 +96,7 @@ def test_arrow_load_from_zero_chunks(): from pandas.core.arrays.arrow.extension_types import ArrowPeriodType - arr = PeriodArray([], freq="D") + arr = PeriodArray([], dtype="period[D]") df = pd.DataFrame({"a": arr}) table = pa.table(df) @@ -110,7 +110,7 @@ def test_arrow_load_from_zero_chunks(): def test_arrow_table_roundtrip_without_metadata(): - arr = PeriodArray([1, 2, 3], freq="H") + arr = PeriodArray([1, 2, 3], dtype="period[H]") arr[1] = pd.NaT df = pd.DataFrame({"a": arr}) diff --git a/pandas/tests/arrays/period/test_constructors.py b/pandas/tests/arrays/period/test_constructors.py index cf9749058d1d1..11c9fd75fba38 100644 --- a/pandas/tests/arrays/period/test_constructors.py +++ b/pandas/tests/arrays/period/test_constructors.py @@ -75,16 +75,17 @@ def test_period_array_raises(data, freq, msg): def test_period_array_non_period_series_raies(): ser = pd.Series([1, 2, 3]) with pytest.raises(TypeError, match="dtype"): - PeriodArray(ser, freq="D") + PeriodArray(ser, dtype="period[D]") def test_period_array_freq_mismatch(): arr = period_array(["2000", "2001"], freq="D") with pytest.raises(IncompatibleFrequency, match="freq"): - PeriodArray(arr, freq="M") + PeriodArray(arr, dtype="period[M]") + dtype = pd.PeriodDtype(pd.tseries.offsets.MonthEnd()) with pytest.raises(IncompatibleFrequency, match="freq"): - PeriodArray(arr, freq=pd.tseries.offsets.MonthEnd()) + PeriodArray(arr, dtype=dtype) def test_from_sequence_disallows_i8(): diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 2a300b6a724d0..9e402af931199 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -154,9 +154,10 @@ def test_take(self): data = np.arange(100, dtype="i8") * 24 * 3600 * 10**9 np.random.shuffle(data) - freq = None if self.array_cls is not PeriodArray else "D" - - arr = self.array_cls(data, freq=freq) + if self.array_cls is PeriodArray: + arr = PeriodArray(data, dtype="period[D]") + else: + arr = self.array_cls(data) idx = self.index_cls._simple_new(arr) takers = [1, 4, 94] @@ -172,19 +173,15 @@ def test_take(self): tm.assert_index_equal(self.index_cls(result), expected) @pytest.mark.parametrize("fill_value", [2, 2.0, Timestamp(2021, 1, 1, 12).time]) - def test_take_fill_raises(self, fill_value): - data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 - - arr = self.array_cls(data, freq="D") - - msg = f"value should be a '{arr._scalar_type.__name__}' or 'NaT'. Got" + def test_take_fill_raises(self, fill_value, arr1d): + msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got" with pytest.raises(TypeError, match=msg): - arr.take([0, 1], allow_fill=True, fill_value=fill_value) + arr1d.take([0, 1], allow_fill=True, fill_value=fill_value) - def test_take_fill(self): - data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 + def test_take_fill(self, arr1d): + np.arange(10, dtype="i8") * 24 * 3600 * 10**9 - arr = self.array_cls(data, freq="D") + arr = arr1d # self.array_cls(data, freq="D") result = arr.take([-1, 1], allow_fill=True, fill_value=None) assert result[0] is NaT @@ -217,46 +214,39 @@ def test_concat_same_type(self, arr1d): tm.assert_index_equal(self.index_cls(result), expected) - def test_unbox_scalar(self): - data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 - arr = self.array_cls(data, freq="D") - result = arr._unbox_scalar(arr[0]) - expected = arr._ndarray.dtype.type + def test_unbox_scalar(self, arr1d): + result = arr1d._unbox_scalar(arr1d[0]) + expected = arr1d._ndarray.dtype.type assert isinstance(result, expected) - result = arr._unbox_scalar(NaT) + result = arr1d._unbox_scalar(NaT) assert isinstance(result, expected) msg = f"'value' should be a {self.scalar_type.__name__}." with pytest.raises(ValueError, match=msg): - arr._unbox_scalar("foo") - - def test_check_compatible_with(self): - data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 - arr = self.array_cls(data, freq="D") + arr1d._unbox_scalar("foo") - arr._check_compatible_with(arr[0]) - arr._check_compatible_with(arr[:1]) - arr._check_compatible_with(NaT) - - def test_scalar_from_string(self): - data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 - arr = self.array_cls(data, freq="D") - result = arr._scalar_from_string(str(arr[0])) - assert result == arr[0] + def test_check_compatible_with(self, arr1d): + arr1d._check_compatible_with(arr1d[0]) + arr1d._check_compatible_with(arr1d[:1]) + arr1d._check_compatible_with(NaT) - def test_reduce_invalid(self): - data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 - arr = self.array_cls(data, freq="D") + def test_scalar_from_string(self, arr1d): + result = arr1d._scalar_from_string(str(arr1d[0])) + assert result == arr1d[0] + def test_reduce_invalid(self, arr1d): msg = "does not support reduction 'not a method'" with pytest.raises(TypeError, match=msg): - arr._reduce("not a method") + arr1d._reduce("not a method") @pytest.mark.parametrize("method", ["pad", "backfill"]) def test_fillna_method_doesnt_change_orig(self, method): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 - arr = self.array_cls(data, freq="D") + if self.array_cls is PeriodArray: + arr = self.array_cls(data, dtype="period[D]") + else: + arr = self.array_cls(data) arr[4] = NaT fill_value = arr[3] if method == "pad" else arr[5] @@ -269,7 +259,10 @@ def test_fillna_method_doesnt_change_orig(self, method): def test_searchsorted(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 - arr = self.array_cls(data, freq="D") + if self.array_cls is PeriodArray: + arr = self.array_cls(data, dtype="period[D]") + else: + arr = self.array_cls(data) # scalar result = arr.searchsorted(arr[1]) @@ -340,7 +333,10 @@ def test_getitem_near_implementation_bounds(self): # We only check tz-naive for DTA bc the bounds are slightly different # for other tzs i8vals = np.asarray([NaT._value + n for n in range(1, 5)], dtype="i8") - arr = self.array_cls(i8vals, freq="ns") + if self.array_cls is PeriodArray: + arr = self.array_cls(i8vals, dtype="period[ns]") + else: + arr = self.array_cls(i8vals, freq="ns") arr[0] # should not raise OutOfBoundsDatetime index = pd.Index(arr) @@ -407,7 +403,10 @@ def test_repr_2d(self, arr1d): def test_setitem(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 - arr = self.array_cls(data, freq="D") + if self.array_cls is PeriodArray: + arr = self.array_cls(data, dtype="period[D]") + else: + arr = self.array_cls(data, freq="D") arr[0] = arr[1] expected = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 @@ -482,9 +481,8 @@ def test_setitem_categorical(self, arr1d, as_index): tm.assert_equal(arr1d, expected) - def test_setitem_raises(self): - data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 - arr = self.array_cls(data, freq="D") + def test_setitem_raises(self, arr1d): + arr = arr1d[:10] val = arr[0] with pytest.raises(IndexError, match="index 12 is out of bounds"): @@ -520,7 +518,10 @@ def test_setitem_numeric_raises(self, arr1d, box): def test_inplace_arithmetic(self): # GH#24115 check that iadd and isub are actually in-place data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 - arr = self.array_cls(data, freq="D") + if self.array_cls is PeriodArray: + arr = self.array_cls(data, dtype="period[D]") + else: + arr = self.array_cls(data, freq="D") expected = arr + pd.Timedelta(days=1) arr += pd.Timedelta(days=1) @@ -530,13 +531,10 @@ def test_inplace_arithmetic(self): arr -= pd.Timedelta(days=1) tm.assert_equal(arr, expected) - def test_shift_fill_int_deprecated(self): + def test_shift_fill_int_deprecated(self, arr1d): # GH#31971, enforced in 2.0 - data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 - arr = self.array_cls(data, freq="D") - with pytest.raises(TypeError, match="value should be a"): - arr.shift(1, fill_value=1) + arr1d.shift(1, fill_value=1) def test_median(self, arr1d): arr = arr1d diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index a4b442ff526e9..d1e954bc2ebe2 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -9,10 +9,7 @@ import pandas as pd import pandas._testing as tm -from pandas.core.arrays import ( - PeriodArray, - period_array, -) +from pandas.core.arrays import PeriodArray # ---------------------------------------------------------------------------- # Dtype @@ -30,13 +27,13 @@ def test_registered(): def test_asi8(): - result = period_array(["2000", "2001", None], freq="D").asi8 + result = PeriodArray._from_sequence(["2000", "2001", None], dtype="period[D]").asi8 expected = np.array([10957, 11323, iNaT]) tm.assert_numpy_array_equal(result, expected) def test_take_raises(): - arr = period_array(["2000", "2001"], freq="D") + arr = PeriodArray._from_sequence(["2000", "2001"], dtype="period[D]") with pytest.raises(IncompatibleFrequency, match="freq"): arr.take([0, -1], allow_fill=True, fill_value=pd.Period("2000", freq="W")) @@ -46,13 +43,13 @@ def test_take_raises(): def test_fillna_raises(): - arr = period_array(["2000", "2001", "2002"], freq="D") + arr = PeriodArray._from_sequence(["2000", "2001", "2002"], dtype="period[D]") with pytest.raises(ValueError, match="Length"): arr.fillna(arr[:2]) def test_fillna_copies(): - arr = period_array(["2000", "2001", "2002"], freq="D") + arr = PeriodArray._from_sequence(["2000", "2001", "2002"], dtype="period[D]") result = arr.fillna(pd.Period("2000", "D")) assert result is not arr @@ -76,30 +73,30 @@ def test_fillna_copies(): ], ) def test_setitem(key, value, expected): - arr = PeriodArray(np.arange(3), freq="D") - expected = PeriodArray(expected, freq="D") + arr = PeriodArray(np.arange(3), dtype="period[D]") + expected = PeriodArray(expected, dtype="period[D]") arr[key] = value tm.assert_period_array_equal(arr, expected) def test_setitem_raises_incompatible_freq(): - arr = PeriodArray(np.arange(3), freq="D") + arr = PeriodArray(np.arange(3), dtype="period[D]") with pytest.raises(IncompatibleFrequency, match="freq"): arr[0] = pd.Period("2000", freq="A") - other = period_array(["2000", "2001"], freq="A") + other = PeriodArray._from_sequence(["2000", "2001"], dtype="period[A]") with pytest.raises(IncompatibleFrequency, match="freq"): arr[[0, 1]] = other def test_setitem_raises_length(): - arr = PeriodArray(np.arange(3), freq="D") + arr = PeriodArray(np.arange(3), dtype="period[D]") with pytest.raises(ValueError, match="length"): arr[[0, 1]] = [pd.Period("2000", freq="D")] def test_setitem_raises_type(): - arr = PeriodArray(np.arange(3), freq="D") + arr = PeriodArray(np.arange(3), dtype="period[D]") with pytest.raises(TypeError, match="int"): arr[0] = 1 @@ -109,7 +106,7 @@ def test_setitem_raises_type(): def test_sub_period(): - arr = period_array(["2000", "2001"], freq="D") + arr = PeriodArray._from_sequence(["2000", "2001"], dtype="period[D]") other = pd.Period("2000", freq="M") with pytest.raises(IncompatibleFrequency, match="freq"): arr - other @@ -135,11 +132,16 @@ def test_sub_period_overflow(): @pytest.mark.parametrize( "other", - [pd.Period("2000", freq="H"), period_array(["2000", "2001", "2000"], freq="H")], + [ + pd.Period("2000", freq="H"), + PeriodArray._from_sequence(["2000", "2001", "2000"], dtype="period[H]"), + ], ) def test_where_different_freq_raises(other): # GH#45768 The PeriodArray method raises, the Series method coerces - ser = pd.Series(period_array(["2000", "2001", "2002"], freq="D")) + ser = pd.Series( + PeriodArray._from_sequence(["2000", "2001", "2002"], dtype="period[D]") + ) cond = np.array([True, False, True]) with pytest.raises(IncompatibleFrequency, match="freq"): @@ -155,7 +157,7 @@ def test_where_different_freq_raises(other): def test_repr_small(): - arr = period_array(["2000", "2001"], freq="D") + arr = PeriodArray._from_sequence(["2000", "2001"], dtype="period[D]") result = str(arr) expected = ( "\n['2000-01-01', '2001-01-01']\nLength: 2, dtype: period[D]" @@ -164,7 +166,7 @@ def test_repr_small(): def test_repr_large(): - arr = period_array(["2000", "2001"] * 500, freq="D") + arr = PeriodArray._from_sequence(["2000", "2001"] * 500, dtype="period[D]") result = str(arr) expected = ( "\n" diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index a910b20c476ff..6924c3920245c 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -245,7 +245,7 @@ def test_numpy_array_all_dtypes(any_numpy_dtype): "arr, attr", [ (pd.Categorical(["a", "b"]), "_codes"), - (pd.core.arrays.period_array(["2000", "2001"], freq="D"), "_ndarray"), + (PeriodArray._from_sequence(["2000", "2001"], dtype="period[D]"), "_ndarray"), (pd.array([0, np.nan], dtype="Int64"), "_data"), (IntervalArray.from_breaks([0, 1]), "_left"), (SparseArray([0, 1]), "_sparse_values"), diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index 6459942c99190..9a5bd5b1d047b 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -29,7 +29,10 @@ class TestABCClasses: ("ABCRangeIndex", pd.RangeIndex(3)), ("ABCTimedeltaIndex", timedelta_index), ("ABCIntervalIndex", pd.interval_range(start=0, end=3)), - ("ABCPeriodArray", pd.arrays.PeriodArray([2000, 2001, 2002], freq="D")), + ( + "ABCPeriodArray", + pd.arrays.PeriodArray([2000, 2001, 2002], dtype="period[D]"), + ), ("ABCPandasArray", pd.arrays.PandasArray(np.array([0, 1, 2]))), ("ABCPeriodIndex", period_index), ("ABCCategoricalIndex", categorical_df.index), diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index 0c961a8563a6e..6ddd1dff92f01 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -35,27 +35,27 @@ def dtype(request): @pytest.fixture def data(dtype): - return PeriodArray(np.arange(1970, 2070), freq=dtype.freq) + return PeriodArray(np.arange(1970, 2070), dtype=dtype) @pytest.fixture def data_for_twos(dtype): - return PeriodArray(np.ones(100) * 2, freq=dtype.freq) + return PeriodArray(np.ones(100) * 2, dtype=dtype) @pytest.fixture def data_for_sorting(dtype): - return PeriodArray([2018, 2019, 2017], freq=dtype.freq) + return PeriodArray([2018, 2019, 2017], dtype=dtype) @pytest.fixture def data_missing(dtype): - return PeriodArray([iNaT, 2017], freq=dtype.freq) + return PeriodArray([iNaT, 2017], dtype=dtype) @pytest.fixture def data_missing_for_sorting(dtype): - return PeriodArray([2018, iNaT, 2017], freq=dtype.freq) + return PeriodArray([2018, iNaT, 2017], dtype=dtype) @pytest.fixture @@ -64,7 +64,7 @@ def data_for_grouping(dtype): NA = iNaT A = 2017 C = 2019 - return PeriodArray([B, B, NA, NA, A, A, B, C], freq=dtype.freq) + return PeriodArray([B, B, NA, NA, A, A, B, C], dtype=dtype) @pytest.fixture diff --git a/pandas/tests/indexes/datetimelike_/test_equals.py b/pandas/tests/indexes/datetimelike_/test_equals.py index f59963ec3effc..d85d7103fe381 100644 --- a/pandas/tests/indexes/datetimelike_/test_equals.py +++ b/pandas/tests/indexes/datetimelike_/test_equals.py @@ -75,7 +75,7 @@ def test_equals2(self, freq): # same internal, different tz idx3 = PeriodIndex._simple_new( - idx._values._simple_new(idx._values.asi8, freq="H") + idx._values._simple_new(idx._values.asi8, dtype=pd.PeriodDtype("H")) ) tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) assert not idx.equals(idx3) diff --git a/pandas/tests/indexes/period/test_constructors.py b/pandas/tests/indexes/period/test_constructors.py index 60e50a757a271..5593ed018eb0f 100644 --- a/pandas/tests/indexes/period/test_constructors.py +++ b/pandas/tests/indexes/period/test_constructors.py @@ -330,9 +330,9 @@ def test_constructor_simple_new(self): msg = "Should be numpy array of type i8" with pytest.raises(AssertionError, match=msg): # Need ndarray, not int64 Index - type(idx._data)._simple_new(Index(idx.asi8), freq=idx.freq) + type(idx._data)._simple_new(Index(idx.asi8), dtype=idx.dtype) - arr = type(idx._data)._simple_new(idx.asi8, freq=idx.freq) + arr = type(idx._data)._simple_new(idx.asi8, dtype=idx.dtype) result = idx._simple_new(arr, name="p") tm.assert_index_equal(result, idx) diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py index 8ab15abeca7fd..834e6fac11283 100644 --- a/pandas/tests/plotting/test_converter.py +++ b/pandas/tests/plotting/test_converter.py @@ -392,7 +392,7 @@ def test_quarterly_finder(year_span): (min_anndef, maj_anndef) = converter._get_default_annual_spacing(nyears) result = converter._quarterly_finder(vmin, vmax, "Q") quarters = PeriodIndex( - arrays.PeriodArray(np.array([x[0] for x in result]), freq="Q") + arrays.PeriodArray(np.array([x[0] for x in result]), dtype="period[Q]") ) majors = np.array([x[1] for x in result]) minors = np.array([x[2] for x in result]) From 061b96d544ac665829722b6141ff197d525cb27a Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 5 Apr 2023 13:33:32 -0700 Subject: [PATCH 2/5] remove unused arg --- pandas/core/arrays/period.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 3707c04b7e387..9af4e7f7d9f58 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -844,7 +844,6 @@ def period_array( data: Sequence[Period | str | None] | AnyArrayLike, freq: str | Tick | None = None, copy: bool = False, - warn=True, ) -> PeriodArray: """ Construct a new PeriodArray from a sequence of Period scalars. From 9c4c88e2b46931d95069b792f3d153d0cfbc5904 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 5 Apr 2023 15:04:12 -0700 Subject: [PATCH 3/5] remove warn --- pandas/core/indexes/period.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 284b09f8a2e75..2a087612245be 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -267,7 +267,7 @@ def __new__( data = PeriodArray(ordinal, dtype=dtype) else: # don't pass copy here, since we copy later. - data = period_array(data=data, freq=freq, warn=False) + data = period_array(data=data, freq=freq) if copy: data = data.copy() From c9c33e2c2cf195beb1d528df94cc1bcc813104de Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 6 Apr 2023 10:13:49 -0700 Subject: [PATCH 4/5] deprecate freq instead --- doc/source/whatsnew/v2.1.0.rst | 3 ++- pandas/core/arrays/period.py | 17 ++++++++++++++++- pandas/tests/arrays/period/test_constructors.py | 11 +++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 107b22953ff79..6e78990724064 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -173,7 +173,8 @@ Deprecations - Deprecated the methods :meth:`Series.bool` and :meth:`DataFrame.bool` (:issue:`51749`) - Deprecated :meth:`DataFrame.swapaxes` and :meth:`Series.swapaxes`, use :meth:`DataFrame.transpose` or :meth:`Series.transpose` instead (:issue:`51946`) - Deprecated parameter ``convert_type`` in :meth:`Series.apply` (:issue:`52140`) - +- Deprecated ``freq`` parameter in :class:`PeriodArray` constructor, pass ``dtype`` instead (:issue:52462`) +- .. --------------------------------------------------------------------------- .. _whatsnew_210.performance: diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 9af4e7f7d9f58..03d7efcbb1685 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -12,6 +12,7 @@ cast, overload, ) +import warnings import numpy as np @@ -50,6 +51,7 @@ cache_readonly, doc, ) +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( ensure_object, @@ -209,7 +211,20 @@ def _scalar_type(self) -> type[Period]: # -------------------------------------------------------------------- # Constructors - def __init__(self, values, dtype: Dtype | None = None, copy: bool = False) -> None: + def __init__( + self, values, dtype: Dtype | None = None, freq=None, copy: bool = False + ) -> None: + if freq is not None: + # GH#52462 + warnings.warn( + "The 'freq' keyword in the PeriodArray constructor is deprecated " + "and will be removed in a future version. Pass 'dtype' instead", + FutureWarning, + stacklevel=find_stack_level(), + ) + freq = validate_dtype_freq(dtype, freq) + dtype = PeriodDtype(freq) + if dtype is not None: dtype = pandas_dtype(dtype) if not isinstance(dtype, PeriodDtype): diff --git a/pandas/tests/arrays/period/test_constructors.py b/pandas/tests/arrays/period/test_constructors.py index 11c9fd75fba38..8c3c2bd095adf 100644 --- a/pandas/tests/arrays/period/test_constructors.py +++ b/pandas/tests/arrays/period/test_constructors.py @@ -122,3 +122,14 @@ def test_from_td64nat_sequence_raises(): pd.Series(arr, dtype=dtype) with pytest.raises(ValueError, match=msg): pd.DataFrame(arr, dtype=dtype) + + +def test_freq_deprecated(): + # GH#52462 + data = np.arange(5).astype(np.int64) + msg = "The 'freq' keyword in the PeriodArray constructor is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = PeriodArray(data, freq="M") + + expected = PeriodArray(data, dtype="period[M]") + tm.assert_equal(res, expected) From f97022e3035073ff9618f1f7cb6097191b4185c2 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 6 Apr 2023 19:20:04 -0700 Subject: [PATCH 5/5] Update doc/source/whatsnew/v2.1.0.rst --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 6e78990724064..e6e5c8a5d90d0 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -173,7 +173,7 @@ Deprecations - Deprecated the methods :meth:`Series.bool` and :meth:`DataFrame.bool` (:issue:`51749`) - Deprecated :meth:`DataFrame.swapaxes` and :meth:`Series.swapaxes`, use :meth:`DataFrame.transpose` or :meth:`Series.transpose` instead (:issue:`51946`) - Deprecated parameter ``convert_type`` in :meth:`Series.apply` (:issue:`52140`) -- Deprecated ``freq`` parameter in :class:`PeriodArray` constructor, pass ``dtype`` instead (:issue:52462`) +- Deprecated ``freq`` parameter in :class:`PeriodArray` constructor, pass ``dtype`` instead (:issue:`52462`) - .. ---------------------------------------------------------------------------