Skip to content

CLN: replace unnecessary freq_to_period_freqstr with PeriodDtype constructor #56550

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion pandas/_libs/tslibs/dtypes.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ cpdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1
cdef NPY_DATETIMEUNIT get_supported_reso(NPY_DATETIMEUNIT reso)
cdef bint is_supported_unit(NPY_DATETIMEUNIT reso)

cpdef freq_to_period_freqstr(freq_n, freq_name)
cdef dict c_OFFSET_TO_PERIOD_FREQSTR
cdef dict c_OFFSET_DEPR_FREQSTR
cdef dict c_REVERSE_OFFSET_DEPR_FREQSTR
Expand Down
1 change: 0 additions & 1 deletion pandas/_libs/tslibs/dtypes.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ OFFSET_TO_PERIOD_FREQSTR: dict[str, str]
def periods_per_day(reso: int = ...) -> int: ...
def periods_per_second(reso: int) -> int: ...
def abbrev_to_npy_unit(abbrev: str | None) -> int: ...
def freq_to_period_freqstr(freq_n: int, freq_name: str) -> str: ...

class PeriodDtypeBase:
_dtype_code: int # PeriodDtypeCode
Expand Down
9 changes: 0 additions & 9 deletions pandas/_libs/tslibs/dtypes.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -334,15 +334,6 @@ cdef dict c_REVERSE_OFFSET_DEPR_FREQSTR = {
v: k for k, v in c_OFFSET_DEPR_FREQSTR.items()
}

cpdef freq_to_period_freqstr(freq_n, freq_name):
if freq_n == 1:
freqstr = f"""{c_OFFSET_TO_PERIOD_FREQSTR.get(
freq_name, freq_name)}"""
else:
freqstr = f"""{freq_n}{c_OFFSET_TO_PERIOD_FREQSTR.get(
freq_name, freq_name)}"""
return freqstr

# Map deprecated resolution abbreviations to correct resolution abbreviations
cdef dict c_DEPR_ABBREVS = {
"A": "Y",
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4709,7 +4709,7 @@ _lite_rule_alias = {
"ns": "ns",
}

_dont_uppercase = _dont_uppercase = {"h", "bh", "cbh", "MS", "ms", "s"}
_dont_uppercase = {"h", "bh", "cbh", "MS", "ms", "s"}


INVALID_FREQ_ERR_MSG = "Invalid frequency: {0}"
Expand Down
23 changes: 12 additions & 11 deletions pandas/_libs/tslibs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,7 @@ from libc.time cimport (
tm,
)

from pandas._libs.tslibs.dtypes cimport (
c_OFFSET_TO_PERIOD_FREQSTR,
freq_to_period_freqstr,
)
from pandas._libs.tslibs.dtypes cimport c_OFFSET_TO_PERIOD_FREQSTR

from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime

Expand Down Expand Up @@ -97,9 +94,6 @@ from pandas._libs.tslibs.dtypes cimport (
attrname_to_abbrevs,
freq_group_code_to_npy_unit,
)

from pandas._libs.tslibs.dtypes import freq_to_period_freqstr

from pandas._libs.tslibs.parsing cimport quarter_to_myear

from pandas._libs.tslibs.parsing import parse_datetime_string_with_reso
Expand Down Expand Up @@ -1554,7 +1548,7 @@ def extract_ordinals(ndarray values, freq) -> np.ndarray:
# if we don't raise here, we'll segfault later!
raise TypeError("extract_ordinals values must be object-dtype")

freqstr = freq_to_period_freqstr(freq.n, freq.name)
freqstr = PeriodDtypeBase(freq._period_dtype_code, freq.n)._freqstr

for i in range(n):
# Analogous to: p = values[i]
Expand Down Expand Up @@ -1722,8 +1716,15 @@ cdef class PeriodMixin:
condition = self.freq != other

if condition:
freqstr = freq_to_period_freqstr(self.freq.n, self.freq.name)
other_freqstr = freq_to_period_freqstr(other.n, other.name)
freqstr = PeriodDtypeBase(
self.freq._period_dtype_code, self.freq.n
)._freqstr
if hasattr(other, "_period_dtype_code"):
other_freqstr = PeriodDtypeBase(
other._period_dtype_code, other.n
)._freqstr
else:
other_freqstr = other.freqstr
Comment on lines -1725 to +1727
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we need the if-else branch for other but not for self.freq?

Copy link
Contributor Author

@natmokval natmokval Feb 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

self.freq always has attribute "_period_dtype_code", bur other sometimes doesn't.
For example in many tests in pandas/tests/arithmetic/test_period.py( e.g. test_pi_add_sub_timedeltalike_freq_mismatch_monthly) other is equal pd.offsets.YearBegin(2)

msg = DIFFERENT_FREQ.format(
cls=type(self).__name__,
own_freq=freqstr,
Expand Down Expand Up @@ -2479,7 +2480,7 @@ cdef class _Period(PeriodMixin):
>>> pd.Period('2020-01', 'D').freqstr
'D'
"""
freqstr = freq_to_period_freqstr(self.freq.n, self.freq.name)
freqstr = PeriodDtypeBase(self.freq._period_dtype_code, self.freq.n)._freqstr
return freqstr

def __repr__(self) -> str:
Expand Down
11 changes: 6 additions & 5 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
from pandas._libs.tslibs.dtypes import (
FreqGroup,
PeriodDtypeBase,
freq_to_period_freqstr,
)
from pandas._libs.tslibs.fields import isleapyear_arr
from pandas._libs.tslibs.offsets import (
Expand Down Expand Up @@ -323,7 +322,7 @@ def _from_datetime64(cls, data, freq, tz=None) -> Self:
PeriodArray[freq]
"""
if isinstance(freq, BaseOffset):
freq = freq_to_period_freqstr(freq.n, freq.name)
freq = PeriodDtype(freq)._freqstr
data, freq = dt64arr_to_periodarr(data, freq, tz)
dtype = PeriodDtype(freq)
return cls(data, dtype=dtype)
Expand Down Expand Up @@ -397,7 +396,7 @@ def freq(self) -> BaseOffset:

@property
def freqstr(self) -> str:
return freq_to_period_freqstr(self.freq.n, self.freq.name)
return PeriodDtype(self.freq)._freqstr

def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
if dtype == "i8":
Expand Down Expand Up @@ -986,13 +985,15 @@ def raise_on_incompatible(left, right) -> IncompatibleFrequency:
if isinstance(right, (np.ndarray, ABCTimedeltaArray)) or right is None:
other_freq = None
elif isinstance(right, BaseOffset):
other_freq = freq_to_period_freqstr(right.n, right.name)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks a bit dangerous :) is this for the business day warning? if so, I'd suggest either enforcing that one first, or using warnings.filterwarnings and only filtering the warning you mean to

Copy link
Contributor Author

@natmokval natmokval Feb 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, this is for the business day warning. I agree, it's better to use warnings.filterwarnings instead of warnings.simplefilter. I made changes in order to filter: "Future warning "PeriodDtype[B] is deprecated"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can rid of this warnings.filterwarnings after enforcing depreciation of PeriodDtype[B], or we can postpone merging this PR until enforcing deprecation of PeriodDtype[B] is done.

other_freq = PeriodDtype(right)._freqstr
elif isinstance(right, (ABCPeriodIndex, PeriodArray, Period)):
other_freq = right.freqstr
else:
other_freq = delta_to_tick(Timedelta(right)).freqstr

own_freq = freq_to_period_freqstr(left.freq.n, left.freq.name)
own_freq = PeriodDtype(left.freq)._freqstr
msg = DIFFERENT_FREQ.format(
cls=type(left).__name__, own_freq=own_freq, other_freq=other_freq
)
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
Timestamp,
to_offset,
)
from pandas._libs.tslibs.dtypes import freq_to_period_freqstr
from pandas._typing import (
AlignJoin,
AnyArrayLike,
Expand Down Expand Up @@ -123,6 +122,7 @@
from pandas.core.dtypes.dtypes import (
DatetimeTZDtype,
ExtensionDtype,
PeriodDtype,
)
from pandas.core.dtypes.generic import (
ABCDataFrame,
Expand Down Expand Up @@ -10228,9 +10228,9 @@ def _shift_with_freq(self, periods: int, axis: int, freq) -> Self:
if freq != orig_freq:
assert orig_freq is not None # for mypy
raise ValueError(
f"Given freq {freq_to_period_freqstr(freq.n, freq.name)} "
f"Given freq {PeriodDtype(freq)._freqstr} "
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this change the message? if so, could you show before vs after please?

if not, then is it possible to do this replacement across the whole codebase and get rid of freq_to_period_freqstr?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this change the message? if so, could you show before vs after please?

It doesn't change the message.

if not, then is it possible to do this replacement across the whole codebase and get rid of freq_to_period_freqstr?

I replaced freq_to_period_freqstr with PeriodDtype(offset)._freqstr where it works. Now I am trying to do this replacement in pandas/tseries/frequencies.py and in pandas/_libs/tslibs/period.pyx, but I get failures. I am not sure we can complitely rid of freq_to_period_freqstr

f"does not match PeriodIndex freq "
f"{freq_to_period_freqstr(orig_freq.n, orig_freq.name)}"
f"{PeriodDtype(orig_freq)._freqstr}"
)
new_ax: Index = index.shift(periods)
else:
Expand Down
8 changes: 5 additions & 3 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
parsing,
to_offset,
)
from pandas._libs.tslibs.dtypes import freq_to_period_freqstr
from pandas.compat.numpy import function as nv
from pandas.errors import (
InvalidIndexError,
Expand All @@ -45,7 +44,10 @@
is_list_like,
)
from pandas.core.dtypes.concat import concat_compat
from pandas.core.dtypes.dtypes import CategoricalDtype
from pandas.core.dtypes.dtypes import (
CategoricalDtype,
PeriodDtype,
)

from pandas.core.arrays import (
DatetimeArray,
Expand Down Expand Up @@ -113,7 +115,7 @@ def freqstr(self) -> str:
if self._data.freqstr is not None and isinstance(
self._data, (PeriodArray, PeriodIndex)
):
freq = freq_to_period_freqstr(self._data.freq.n, self._data.freq.name)
freq = PeriodDtype(self._data.freq)._freqstr
return freq
else:
return self._data.freqstr # type: ignore[return-value]
Expand Down
8 changes: 5 additions & 3 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
Timestamp,
to_offset,
)
from pandas._libs.tslibs.dtypes import freq_to_period_freqstr
from pandas._typing import NDFrameT
from pandas.errors import AbstractMethodError
from pandas.util._decorators import (
Expand All @@ -38,7 +37,10 @@
rewrite_warning,
)

from pandas.core.dtypes.dtypes import ArrowDtype
from pandas.core.dtypes.dtypes import (
ArrowDtype,
PeriodDtype,
)
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCSeries,
Expand Down Expand Up @@ -2633,7 +2635,7 @@ def asfreq(

if isinstance(freq, BaseOffset):
if hasattr(freq, "_period_dtype_code"):
freq = freq_to_period_freqstr(freq.n, freq.name)
freq = PeriodDtype(freq)._freqstr

new_obj = obj.copy()
new_obj.index = obj.index.asfreq(freq, how=how)
Expand Down
4 changes: 1 addition & 3 deletions pandas/io/json/_table_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from pandas._libs import lib
from pandas._libs.json import ujson_loads
from pandas._libs.tslibs import timezones
from pandas._libs.tslibs.dtypes import freq_to_period_freqstr
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.base import _registry as registry
Expand Down Expand Up @@ -212,8 +211,7 @@ def convert_json_field_to_pandas_type(field) -> str | CategoricalDtype:
elif field.get("freq"):
# GH#9586 rename frequency M to ME for offsets
offset = to_offset(field["freq"])
freq_n, freq_name = offset.n, offset.name
freq = freq_to_period_freqstr(freq_n, freq_name)
freq = PeriodDtype(offset)._freqstr
# GH#47747 using datetime over period to minimize the change surface
return f"period[{freq}]"
else:
Expand Down
8 changes: 5 additions & 3 deletions pandas/tests/arrays/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
OutOfBoundsDatetime,
Timestamp,
)
from pandas._libs.tslibs.dtypes import freq_to_period_freqstr
from pandas._libs.tslibs import to_offset

from pandas.core.dtypes.dtypes import PeriodDtype

import pandas as pd
from pandas import (
Expand Down Expand Up @@ -51,7 +53,7 @@ def period_index(freqstr):
warnings.filterwarnings(
"ignore", message="Period with BDay freq", category=FutureWarning
)
freqstr = freq_to_period_freqstr(1, freqstr)
freqstr = PeriodDtype(to_offset(freqstr))._freqstr
pi = pd.period_range(start=Timestamp("2000-01-01"), periods=100, freq=freqstr)
return pi

Expand Down Expand Up @@ -761,7 +763,7 @@ def test_to_period(self, datetime_index, freqstr):
dti = datetime_index
arr = dti._data

freqstr = freq_to_period_freqstr(1, freqstr)
freqstr = PeriodDtype(to_offset(freqstr))._freqstr
expected = dti.to_period(freq=freqstr)
result = arr.to_period(freq=freqstr)
assert isinstance(result, PeriodArray)
Expand Down
9 changes: 6 additions & 3 deletions pandas/tests/plotting/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
timedelta,
)
import pickle
import re

import numpy as np
import pytest
Expand All @@ -14,7 +15,8 @@
BaseOffset,
to_offset,
)
from pandas._libs.tslibs.dtypes import freq_to_period_freqstr

from pandas.core.dtypes.dtypes import PeriodDtype

from pandas import (
DataFrame,
Expand Down Expand Up @@ -238,7 +240,8 @@ def test_line_plot_period_mlt_frame(self, frqncy):
index=idx,
columns=["A", "B", "C"],
)
freq = freq_to_period_freqstr(1, df.index.freq.rule_code)
freq = re.split(r"\d", PeriodDtype(df.index.freq)._freqstr)[-1]

freq = df.index.asfreq(freq).freq
_check_plot_works(df.plot, freq)

Expand All @@ -253,7 +256,7 @@ def test_line_plot_datetime_frame(self, freq):
index=idx,
columns=["A", "B", "C"],
)
freq = freq_to_period_freqstr(1, df.index.freq.rule_code)
freq = re.split(r"\d", PeriodDtype(df.index.freq)._freqstr)[-1]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why does this one require regex splitting, but others don't?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, seems like we don't need this regex splitting here and can replace freq = re.split(r"\d", PeriodDtype(df.index.freq)._freqstr)[-1] with freq = PeriodDtype(df.index.freq)._freqstr

I will make this change.

There is another test test_line_plot_period_mlt_frame in which we use the same regex splitting. I tried to do the similar replacement (with freq = PeriodDtype(df.index.freq)._freqstr[1:]), but the test failed for "1s".

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think in test_line_plot_period_mlt_frame we can change freq from

    freq = re.split(r"\d", PeriodDtype(df.index.freq)._freqstr)[-1]
    freq = df.index.asfreq(freq).freq

to

freq = df.index.freq.rule_code

and then get rid of regex splitting. I modified the test.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we use the same replacement in test_line_plot_datetime_frame, e.g. replace

freq = PeriodDtype(df.index.freq)._freqstr
freq = df.index.to_period(freq).freq

with

freq = df.index.freq.rule_code

the test passes. Should we modify this test as well?

freq = df.index.to_period(freq).freq
_check_plot_works(df.plot, freq)

Expand Down
7 changes: 2 additions & 5 deletions pandas/tseries/frequencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,7 @@
MONTHS,
int_to_weekday,
)
from pandas._libs.tslibs.dtypes import (
OFFSET_TO_PERIOD_FREQSTR,
freq_to_period_freqstr,
)
from pandas._libs.tslibs.dtypes import OFFSET_TO_PERIOD_FREQSTR
from pandas._libs.tslibs.fields import (
build_field_sarray,
month_position_check,
Expand Down Expand Up @@ -559,7 +556,7 @@ def _maybe_coerce_freq(code) -> str:
"""
assert code is not None
if isinstance(code, DateOffset):
code = freq_to_period_freqstr(1, code.name)
code = PeriodDtype(to_offset(code.name))._freqstr
if code in {"h", "min", "s", "ms", "us", "ns"}:
return code
else:
Expand Down