Skip to content

REF: remove freq arg from PeriodArray constructor #52462

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 7, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,8 @@ Deprecations
- Deprecated the methods :meth:`Series.bool` and :meth:`DataFrame.bool` (:issue:`51749`)
- Deprecated :meth:`DataFrame.swapaxes` and :meth:`Series.swapaxes`, use :meth:`DataFrame.transpose` or :meth:`Series.transpose` instead (:issue:`51946`)
- Deprecated parameter ``convert_type`` in :meth:`Series.apply` (:issue:`52140`)

- Deprecated ``freq`` parameter in :class:`PeriodArray` constructor, pass ``dtype`` instead (:issue:52462`)
-

.. ---------------------------------------------------------------------------
.. _whatsnew_210.performance:
Expand Down
7 changes: 3 additions & 4 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1106,7 +1106,8 @@ def _add_period(self, other: Period) -> PeriodArray:
from pandas.core.arrays.period import PeriodArray

i8vals = np.broadcast_to(other.ordinal, self.shape)
parr = PeriodArray(i8vals, freq=other.freq)
dtype = PeriodDtype(other.freq)
parr = PeriodArray(i8vals, dtype=dtype)
return parr + self

def _add_offset(self, offset):
Expand Down Expand Up @@ -1282,9 +1283,7 @@ def _accumulate(self, name: str, *, skipna: bool = True, **kwargs) -> Self:
op = getattr(datetimelike_accumulations, name)
result = op(self.copy(), skipna=skipna, **kwargs)

return type(self)._simple_new(
result, freq=None, dtype=self.dtype # type: ignore[call-arg]
)
return type(self)._simple_new(result, dtype=self.dtype)

@unpack_zerodim_and_defer("__add__")
def __add__(self, other):
Expand Down
71 changes: 45 additions & 26 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@
Literal,
Sequence,
TypeVar,
cast,
overload,
)
import warnings

import numpy as np

Expand Down Expand Up @@ -49,6 +51,7 @@
cache_readonly,
doc,
)
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.common import (
ensure_object,
Expand Down Expand Up @@ -116,9 +119,7 @@ class PeriodArray(dtl.DatelikeOps, libperiod.PeriodMixin): # type: ignore[misc]
"""
Pandas ExtensionArray for storing Period data.

Users should use :func:`~pandas.period_array` to create new instances.
Alternatively, :func:`~pandas.array` can be used to create new instances
from a sequence of Period scalars.
Users should use :func:`~pandas.array` to create new instances.

Parameters
----------
Expand Down Expand Up @@ -213,10 +214,21 @@ def _scalar_type(self) -> type[Period]:
def __init__(
self, values, dtype: Dtype | None = None, freq=None, copy: bool = False
) -> None:
freq = validate_dtype_freq(dtype, freq)

if freq is not None:
freq = Period._maybe_convert_freq(freq)
# GH#52462
warnings.warn(
"The 'freq' keyword in the PeriodArray constructor is deprecated "
"and will be removed in a future version. Pass 'dtype' instead",
FutureWarning,
stacklevel=find_stack_level(),
)
freq = validate_dtype_freq(dtype, freq)
dtype = PeriodDtype(freq)

if dtype is not None:
dtype = pandas_dtype(dtype)
if not isinstance(dtype, PeriodDtype):
raise ValueError(f"Invalid dtype {dtype} for PeriodArray")

if isinstance(values, ABCSeries):
values = values._values
Expand All @@ -227,36 +239,38 @@ def __init__(
values = values._values

if isinstance(values, type(self)):
if freq is not None and freq != values.freq:
raise raise_on_incompatible(values, freq)
values, freq = values._ndarray, values.freq
if dtype is not None and dtype != values.dtype:
raise raise_on_incompatible(values, dtype.freq)
values, dtype = values._ndarray, values.dtype

values = np.array(values, dtype="int64", copy=copy)
if freq is None:
raise ValueError("freq is not specified and cannot be inferred")
NDArrayBacked.__init__(self, values, PeriodDtype(freq))
if dtype is None:
raise ValueError("dtype is not specified and cannot be inferred")
dtype = cast(PeriodDtype, dtype)
NDArrayBacked.__init__(self, values, dtype)

# error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked"
@classmethod
def _simple_new( # type: ignore[override]
cls,
values: npt.NDArray[np.int64],
freq: BaseOffset | None = None,
dtype: Dtype | None = None,
dtype: PeriodDtype,
) -> Self:
# alias for PeriodArray.__init__
assertion_msg = "Should be numpy array of type i8"
assert isinstance(values, np.ndarray) and values.dtype == "i8", assertion_msg
return cls(values, freq=freq, dtype=dtype)
return cls(values, dtype=dtype)

@classmethod
def _from_sequence(
cls,
scalars: Sequence[Period | None] | AnyArrayLike,
scalars,
*,
dtype: Dtype | None = None,
copy: bool = False,
) -> Self:
if dtype is not None:
dtype = pandas_dtype(dtype)
if dtype and isinstance(dtype, PeriodDtype):
freq = dtype.freq
else:
Expand All @@ -266,16 +280,14 @@ def _from_sequence(
validate_dtype_freq(scalars.dtype, freq)
if copy:
scalars = scalars.copy()
# error: Incompatible return value type
# (got "Union[Sequence[Optional[Period]], Union[Union[ExtensionArray,
# ndarray[Any, Any]], Index, Series]]", expected "PeriodArray")
return scalars # type: ignore[return-value]
return scalars

periods = np.asarray(scalars, dtype=object)

freq = freq or libperiod.extract_freq(periods)
ordinals = libperiod.extract_ordinals(periods, freq)
return cls(ordinals, freq=freq)
dtype = PeriodDtype(freq)
return cls(ordinals, dtype=dtype)

@classmethod
def _from_sequence_of_strings(
Expand All @@ -299,7 +311,8 @@ def _from_datetime64(cls, data, freq, tz=None) -> Self:
PeriodArray[freq]
"""
data, freq = dt64arr_to_periodarr(data, freq, tz)
return cls(data, freq=freq)
dtype = PeriodDtype(freq)
return cls(data, dtype=dtype)

@classmethod
def _generate_range(cls, start, end, periods, freq, fields):
Expand Down Expand Up @@ -610,7 +623,8 @@ def asfreq(self, freq=None, how: str = "E") -> Self:
if self._hasna:
new_data[self._isnan] = iNaT

return type(self)(new_data, freq=freq)
dtype = PeriodDtype(freq)
return type(self)(new_data, dtype=dtype)

# ------------------------------------------------------------------
# Rendering Methods
Expand Down Expand Up @@ -697,7 +711,7 @@ def _addsub_int_array_or_scalar(
if op is operator.sub:
other = -other
res_values = algos.checked_add_with_arr(self.asi8, other, arr_mask=self._isnan)
return type(self)(res_values, freq=self.freq)
return type(self)(res_values, dtype=self.dtype)

def _add_offset(self, other: BaseOffset):
assert not isinstance(other, Tick)
Expand Down Expand Up @@ -768,7 +782,7 @@ def _add_timedelta_arraylike(
self.asi8, delta.view("i8"), arr_mask=self._isnan, b_mask=b_mask
)
np.putmask(res_values, self._isnan | b_mask, iNaT)
return type(self)(res_values, freq=self.freq)
return type(self)(res_values, dtype=self.dtype)

def _check_timedeltalike_freq_compat(self, other):
"""
Expand Down Expand Up @@ -904,7 +918,12 @@ def period_array(
if is_datetime64_dtype(data_dtype):
return PeriodArray._from_datetime64(data, freq)
if isinstance(data_dtype, PeriodDtype):
return PeriodArray(data, freq=freq)
out = PeriodArray(data)
if freq is not None:
if freq == data_dtype.freq:
return out
return out.asfreq(freq)
return out

# other iterable of some kind
if not isinstance(data, (np.ndarray, list, tuple, ABCSeries)):
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1013,14 +1013,14 @@ def __from_arrow__(
results = []
for arr in chunks:
data, mask = pyarrow_array_to_numpy_and_mask(arr, dtype=np.dtype(np.int64))
parr = PeriodArray(data.copy(), freq=self.freq, copy=False)
parr = PeriodArray(data.copy(), dtype=self, copy=False)
# error: Invalid index type "ndarray[Any, dtype[bool_]]" for "PeriodArray";
# expected type "Union[int, Sequence[int], Sequence[bool], slice]"
parr[~mask] = NaT # type: ignore[index]
results.append(parr)

if not results:
return PeriodArray(np.array([], dtype="int64"), freq=self.freq, copy=False)
return PeriodArray(np.array([], dtype="int64"), dtype=self, copy=False)
return PeriodArray._concat_same_type(results)


Expand Down
14 changes: 9 additions & 5 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
)

from pandas.core.dtypes.common import is_integer
from pandas.core.dtypes.dtypes import PeriodDtype
from pandas.core.dtypes.generic import ABCSeries
from pandas.core.dtypes.missing import is_valid_na_for_dtype

Expand Down Expand Up @@ -52,7 +53,6 @@
npt,
)

from pandas.core.dtypes.dtypes import PeriodDtype

_index_doc_kwargs = dict(ibase._index_doc_kwargs)
_index_doc_kwargs.update({"target_klass": "PeriodIndex or list of Periods"})
Expand All @@ -68,7 +68,8 @@ def _new_PeriodIndex(cls, **d):
values = d.pop("data")
if values.dtype == "int64":
freq = d.pop("freq", None)
values = PeriodArray(values, freq=freq)
dtype = PeriodDtype(freq)
values = PeriodArray(values, dtype=dtype)
return cls._simple_new(values, **d)
else:
return cls(values, **d)
Expand Down Expand Up @@ -246,7 +247,8 @@ def __new__(
# empty when really using the range-based constructor.
freq = freq2

data = PeriodArray(data, freq=freq)
dtype = PeriodDtype(freq)
data = PeriodArray(data, dtype=dtype)
else:
freq = validate_dtype_freq(dtype, freq)

Expand All @@ -261,7 +263,8 @@ def __new__(
if data is None and ordinal is not None:
# we strangely ignore `ordinal` if data is passed.
ordinal = np.asarray(ordinal, dtype=np.int64)
data = PeriodArray(ordinal, freq=freq)
dtype = PeriodDtype(freq)
data = PeriodArray(ordinal, dtype=dtype)
else:
# don't pass copy here, since we copy later.
data = period_array(data=data, freq=freq)
Expand Down Expand Up @@ -535,5 +538,6 @@ def period_range(
freq = "D"

data, freq = PeriodArray._generate_range(start, end, periods, freq, fields={})
data = PeriodArray(data, freq=freq)
dtype = PeriodDtype(freq)
data = PeriodArray(data, dtype=dtype)
return PeriodIndex(data, name=name)
4 changes: 3 additions & 1 deletion pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
CategoricalDtype,
DatetimeTZDtype,
ExtensionDtype,
PeriodDtype,
)
from pandas.core.dtypes.missing import array_equivalent

Expand Down Expand Up @@ -2790,7 +2791,8 @@ def f(values, freq=None, tz=None):
elif index_class == PeriodIndex:

def f(values, freq=None, tz=None):
parr = PeriodArray._simple_new(values, freq=freq)
dtype = PeriodDtype(freq)
parr = PeriodArray._simple_new(values, dtype=dtype)
return PeriodIndex._simple_new(parr, name=None)

factory = f
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/arrays/period/test_arrow_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def test_arrow_array(data, freq):
def test_arrow_array_missing():
from pandas.core.arrays.arrow.extension_types import ArrowPeriodType

arr = PeriodArray([1, 2, 3], freq="D")
arr = PeriodArray([1, 2, 3], dtype="period[D]")
arr[1] = pd.NaT

result = pa.array(arr)
Expand All @@ -75,7 +75,7 @@ def test_arrow_array_missing():
def test_arrow_table_roundtrip():
from pandas.core.arrays.arrow.extension_types import ArrowPeriodType

arr = PeriodArray([1, 2, 3], freq="D")
arr = PeriodArray([1, 2, 3], dtype="period[D]")
arr[1] = pd.NaT
df = pd.DataFrame({"a": arr})

Expand All @@ -96,7 +96,7 @@ def test_arrow_load_from_zero_chunks():

from pandas.core.arrays.arrow.extension_types import ArrowPeriodType

arr = PeriodArray([], freq="D")
arr = PeriodArray([], dtype="period[D]")
df = pd.DataFrame({"a": arr})

table = pa.table(df)
Expand All @@ -110,7 +110,7 @@ def test_arrow_load_from_zero_chunks():


def test_arrow_table_roundtrip_without_metadata():
arr = PeriodArray([1, 2, 3], freq="H")
arr = PeriodArray([1, 2, 3], dtype="period[H]")
arr[1] = pd.NaT
df = pd.DataFrame({"a": arr})

Expand Down
18 changes: 15 additions & 3 deletions pandas/tests/arrays/period/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,16 +75,17 @@ def test_period_array_raises(data, freq, msg):
def test_period_array_non_period_series_raies():
ser = pd.Series([1, 2, 3])
with pytest.raises(TypeError, match="dtype"):
PeriodArray(ser, freq="D")
PeriodArray(ser, dtype="period[D]")


def test_period_array_freq_mismatch():
arr = period_array(["2000", "2001"], freq="D")
with pytest.raises(IncompatibleFrequency, match="freq"):
PeriodArray(arr, freq="M")
PeriodArray(arr, dtype="period[M]")

dtype = pd.PeriodDtype(pd.tseries.offsets.MonthEnd())
with pytest.raises(IncompatibleFrequency, match="freq"):
PeriodArray(arr, freq=pd.tseries.offsets.MonthEnd())
PeriodArray(arr, dtype=dtype)


def test_from_sequence_disallows_i8():
Expand Down Expand Up @@ -121,3 +122,14 @@ def test_from_td64nat_sequence_raises():
pd.Series(arr, dtype=dtype)
with pytest.raises(ValueError, match=msg):
pd.DataFrame(arr, dtype=dtype)


def test_freq_deprecated():
# GH#52462
data = np.arange(5).astype(np.int64)
msg = "The 'freq' keyword in the PeriodArray constructor is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
res = PeriodArray(data, freq="M")

expected = PeriodArray(data, dtype="period[M]")
tm.assert_equal(res, expected)
Loading