Skip to content

DEPR: lower/uppercase strings such as 'y', 'q', 'H', 'MIN', etc. denoting freqs/units for time series, period, and timedelta #56346

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
c7d82ec
deprecate lowercase strings denoting freq for week, month, monthend, …
natmokval Dec 5, 2023
614710b
fix tests
natmokval Dec 5, 2023
458a9c1
fix tests
natmokval Dec 6, 2023
e9c8af5
correct def _get_offset, fix tests
natmokval Dec 7, 2023
73f68f0
add tests, fix tests
natmokval Dec 7, 2023
eea9b69
fix tests
natmokval Dec 11, 2023
386eb57
correct parse_timedelta_unit, to_offset, fix tests, add tests
natmokval Dec 11, 2023
5dcb918
resolve conflicts, fix tests, add tests
natmokval Dec 12, 2023
25bfbef
fix tests
natmokval Dec 12, 2023
eb418eb
resolve conflicts, depr 'MIN' from to_timedelta, fix tests
natmokval Dec 14, 2023
943b898
Merge branch 'main' into depr-uppercasing-in-get-offset
natmokval Dec 15, 2023
435db76
deprecate 'Min' in favour of 'min'
natmokval Dec 15, 2023
3cc94d2
correct docs
natmokval Dec 15, 2023
457ae96
show depr warning in test_construction() for Period
natmokval Dec 19, 2023
3837b7f
resolve conflict
natmokval Dec 19, 2023
f709557
correct warning message in test_construction()
natmokval Dec 20, 2023
7cd909f
remove from to_offset() unnecessary check, fix test_to_offset_invalid
natmokval Dec 20, 2023
8d1d6dd
fix pre-commit error
natmokval Dec 20, 2023
6c24541
Merge branch 'main' into depr-uppercasing-in-get-offset
natmokval Dec 20, 2023
286b618
add notes to /v2.2.0.rst
natmokval Dec 21, 2023
606d0c5
add filterwarnings to test_to_offset_invalid, correct notes in v2.2.0…
natmokval Dec 21, 2023
32993a9
improve the headline in v2.2.0.rst
natmokval Dec 21, 2023
dcf1994
correct depr note in v2.2.0.rst
natmokval Dec 21, 2023
5443b1e
Merge branch 'main' into depr-uppercasing-in-get-offset
natmokval Dec 21, 2023
7889c7d
correct to_offset() for freqs such us ys, qs, and add tests
natmokval Dec 27, 2023
00e80c0
Merge branch 'main' into depr-uppercasing-in-get-offset
natmokval Dec 27, 2023
0eb098e
resolve conflicts, fix tests
natmokval Jan 4, 2024
b6074b1
resolve conflicts
natmokval Feb 8, 2024
530f056
deprecate lowercase freq 'w', 'd' from timeseries
natmokval Feb 8, 2024
f600538
fix tests for 'D'
natmokval Feb 8, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pandas/_libs/tslibs/dtypes.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -407,8 +407,10 @@ cdef dict c_DEPR_ABBREVS = {
"L": "ms",
"l": "ms",
"U": "us",
"US": "us",
"u": "us",
"N": "ns",
"NS": "ns",
"n": "ns",
}

Expand Down Expand Up @@ -509,7 +511,7 @@ class Resolution(Enum):
warnings.warn(
f"\'{freq}\' is deprecated and will be removed in a future "
f"version. Please use \'{abbrev}\' "
"instead of \'{freq}\'.",
f"instead of \'{freq}\'.",
FutureWarning,
stacklevel=find_stack_level(),
)
Expand Down
94 changes: 57 additions & 37 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4650,29 +4650,7 @@ _lite_rule_alias = {
"ns": "ns",
}

_dont_uppercase = {
"h",
"bh",
"cbh",
"MS",
"ms",
"s",
"me",
"qe",
"qe-dec",
"qe-jan",
"qe-feb",
"qe-mar",
"qe-apr",
"qe-may",
"qe-jun",
"qe-jul",
"qe-aug",
"qe-sep",
"qe-oct",
"qe-nov",
"ye",
}
_dont_uppercase = {"h", "bh", "cbh", "MS", "ms", "s"}


INVALID_FREQ_ERR_MSG = "Invalid frequency: {0}"
Expand All @@ -4691,7 +4669,27 @@ def _get_offset(name: str) -> BaseOffset:
--------
_get_offset('EOM') --> BMonthEnd(1)
"""
if name.lower() not in _dont_uppercase:
if (
name not in _lite_rule_alias
and (name.upper() in _lite_rule_alias)
and name != "ms"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

separate to this PR, but can we move 'ms', 'us', 'ns', and 'min' from _lite_rule_alias? _lit_rule_alias is only ever used with .get(name, name), so I don't think they need to be in it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you, I did as you suggested and removed "min", ”ms”, “ns", and “us" from _lite_rule_alias in separate PR #56516. I added these aliases to the list _dont_uppercase, because otherwise we uppercase them in _get_offset(). could you please take a look at this PR?

I am not sure, do we need “Min” in _lite_rule_alias? We want to deprecate the alias “Min” and left only lowercase “min” for Minutes. Am I correct?

):
warnings.warn(
f"\'{name}\' is deprecated, please use \'{name.upper()}\' instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
elif (
name not in _lite_rule_alias
and (name.lower() in _lite_rule_alias)
and name != "MS"
):
warnings.warn(
f"\'{name}\' is deprecated, please use \'{name.lower()}\' instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
if name not in _dont_uppercase:
name = name.upper()
name = _lite_rule_alias.get(name, name)
name = _lite_rule_alias.get(name.lower(), name)
Expand Down Expand Up @@ -4784,39 +4782,61 @@ cpdef to_offset(freq, bint is_period=False):

tups = zip(split[0::4], split[1::4], split[2::4])
for n, (sep, stride, name) in enumerate(tups):
if is_period is False and name in c_OFFSET_DEPR_FREQSTR:
if is_period is False and name.upper() in c_OFFSET_DEPR_FREQSTR:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also, little nitpick, but if not is_period is generally preferred over if is_period is False (and similarly for the True counterparts)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks, if not is_period looks much better indeed. I replaced if is_period is False with it (and if is_period is True with if is_period).

if n > 0 or stride == "-":
name = name
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this looks very complex, and not sure it's necessary anyway?

Copy link
Contributor Author

@natmokval natmokval Dec 14, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, but I think it's the only way to pass test_to_offset_invalid() in pandas/tests/tslibs/test_to_offset.py
The reason is: when we parse frequency like "2h20m" or "-m" we show our warning FutureWarning: 'm' is deprecated and will be removed in a future version, please use 'ME' instead.
But here 'm' is offsets.Minute(), not offsets.MonthEnd()

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure but '2h20m' is already invalid to begin with?

I'm not sure we should worry about a false positive warning being raised for frequencies which were invalid to begin with

Copy link
Contributor Author

@natmokval natmokval Dec 20, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks, understood. I removed from to_offset() this unnecessary check and corrected parameters in test_to_offset_invalid to avoid raising the false positive warning

else:
warnings.warn(
f"\'{name}\' is deprecated, please use "
f"\'{c_OFFSET_DEPR_FREQSTR.get(name.upper())}\' instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
name = c_OFFSET_DEPR_FREQSTR[name.upper()]
elif (is_period is False and
name != name.upper() and
name.upper() in c_REVERSE_OFFSET_DEPR_FREQSTR):
warnings.warn(
f"\'{name}\' is deprecated, please use "
f"\'{c_OFFSET_DEPR_FREQSTR.get(name)}\' instead.",
f"\'{name.upper()}\' instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
name = c_OFFSET_DEPR_FREQSTR[name]
if is_period is True and name in c_REVERSE_OFFSET_DEPR_FREQSTR:
if name.startswith("Y"):
name = name.upper()
if is_period is True and name.upper() in c_REVERSE_OFFSET_DEPR_FREQSTR:
if name.upper().startswith("Y"):
raise ValueError(
f"for Period, please use \'Y{name[2:]}\' "
f"for Period, please use \'Y{name.upper()[2:]}\' "
f"instead of \'{name}\'"
)
if (name.startswith("B") or
name.startswith("S") or name.startswith("C")):
if (name.upper().startswith("B") or
name.upper().startswith("S") or
name.upper().startswith("C")):
raise ValueError(INVALID_FREQ_ERR_MSG.format(name))
else:
raise ValueError(
f"for Period, please use "
f"\'{c_REVERSE_OFFSET_DEPR_FREQSTR.get(name)}\' "
f"\'{c_REVERSE_OFFSET_DEPR_FREQSTR.get(name.upper())}\' "
f"instead of \'{name}\'"
)
elif is_period is True and name in c_OFFSET_DEPR_FREQSTR:
if name.startswith("A"):
elif is_period is True and name.upper() in c_OFFSET_DEPR_FREQSTR:
if name.upper().startswith("A"):
warnings.warn(
f"\'{name}\' is deprecated and will be removed in a future "
f"version, please use \'{c_DEPR_ABBREVS.get(name)}\' "
f"version, please use "
f"\'{c_DEPR_ABBREVS.get(name.upper())}\' instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
if name.upper() != name:
warnings.warn(
f"\'{name}\' is deprecated and will be removed in "
f"a future version, please use \'{name.upper()}\' "
f"instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
name = c_OFFSET_DEPR_FREQSTR.get(name)
name = c_OFFSET_DEPR_FREQSTR.get(name.upper())

if sep != "" and not sep.isspace():
raise ValueError("separator must be spaces")
Expand Down
6 changes: 3 additions & 3 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -719,15 +719,15 @@ cpdef inline str parse_timedelta_unit(str unit):
return "ns"
elif unit == "M":
return unit
elif unit in c_DEPR_ABBREVS:
elif unit.upper() in c_DEPR_ABBREVS and unit != c_DEPR_ABBREVS.get(unit.upper()):
warnings.warn(
f"\'{unit}\' is deprecated and will be removed in a "
f"future version. Please use \'{c_DEPR_ABBREVS.get(unit)}\' "
f"future version. Please use \'{c_DEPR_ABBREVS.get(unit.upper())}\' "
f"instead of \'{unit}\'.",
FutureWarning,
stacklevel=find_stack_level(),
)
unit = c_DEPR_ABBREVS[unit]
unit = c_DEPR_ABBREVS[unit.upper()]
try:
return timedelta_abbrevs[unit.lower()]
except KeyError:
Expand Down
36 changes: 36 additions & 0 deletions pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,42 @@ def test_date_range_frequency_M_Q_Y_A_deprecated(self, freq, freq_depr):
result = pd.date_range("1/1/2000", periods=4, freq=freq_depr)
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize(
"freq, freq_depr",
[
("2h", "2H"),
("2s", "2S"),
],
)
def test_date_range_uppercase_frequency_deprecated(self, freq, freq_depr):
# GH#9586, GH#54939
depr_msg = f"'{freq_depr[1:]}' is deprecated and will be removed in a "
f"future version. Please use '{freq[1:]}' instead."

expected = pd.date_range("1/1/2000", periods=4, freq=freq)
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
result = pd.date_range("1/1/2000", periods=4, freq=freq_depr)
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize(
"freq, freq_depr",
[
("2ME", "2me"),
("2ME", "2m"),
("2QE-SEP", "2q-sep"),
("2W", "2w"),
("2min", "2MIN"),
],
)
def test_date_range_lowercase_frequency_deprecated(self, freq, freq_depr):
# GH#9586, GH#54939
depr_msg = f"'{freq_depr[1:]}' is deprecated, please use '{freq[1:]}' instead."

expected = pd.date_range("1/1/2000", periods=4, freq=freq)
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
result = pd.date_range("1/1/2000", periods=4, freq=freq_depr)
tm.assert_index_equal(result, expected)


def test_factorize_sort_without_freq():
dta = DatetimeArray._from_sequence([0, 2, 1], dtype="M8[ns]")
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/datetimes/test_partial_slicing.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def test_partial_slice_second_precision(self):
rng = date_range(
start=datetime(2005, 1, 1, 0, 0, 59, microsecond=999990),
periods=20,
freq="US",
freq="us",
)
s = Series(np.arange(20), rng)

Expand Down
11 changes: 7 additions & 4 deletions pandas/tests/indexes/period/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,12 @@ class TestPeriodIndexDisallowedFreqs:
("2M", "2ME"),
("2Q-MAR", "2QE-MAR"),
("2Y-FEB", "2YE-FEB"),
("2M", "2me"),
("2Q-MAR", "2qe-MAR"),
("2Y-FEB", "2yE-feb"),
],
)
def test_period_index_frequency_ME_error_message(self, freq, freq_depr):
def test_period_index_offsets_frequency_error_message(self, freq, freq_depr):
# GH#52064
msg = f"for Period, please use '{freq[1:]}' instead of '{freq_depr[1:]}'"

Expand All @@ -38,7 +41,7 @@ def test_period_index_frequency_ME_error_message(self, freq, freq_depr):
with pytest.raises(ValueError, match=msg):
period_range(start="2020-01-01", end="2020-01-02", freq=freq_depr)

@pytest.mark.parametrize("freq_depr", ["2SME", "2CBME", "2BYE"])
@pytest.mark.parametrize("freq_depr", ["2SME", "2sme", "2CBME", "2BYE", "2Bye"])
def test_period_index_frequency_invalid_freq(self, freq_depr):
# GH#9586
msg = f"Invalid frequency: {freq_depr[1:]}"
Expand Down Expand Up @@ -538,7 +541,7 @@ def test_period_range_length(self):
assert i1.freq == end_intv.freq
assert i1[-1] == end_intv

end_intv = Period("2006-12-31", "1w")
end_intv = Period("2006-12-31", "1W")
i2 = period_range(end=end_intv, periods=10)
assert len(i1) == len(i2)
assert (i1 == i2).all()
Expand Down Expand Up @@ -567,7 +570,7 @@ def test_mixed_freq_raises(self):
with tm.assert_produces_warning(FutureWarning, match=msg):
end_intv = Period("2005-05-01", "B")

vals = [end_intv, Period("2006-12-31", "w")]
vals = [end_intv, Period("2006-12-31", "W")]
msg = r"Input has different freq=W-SUN from PeriodIndex\(freq=B\)"
depr_msg = r"PeriodDtype\[B\] is deprecated"
with pytest.raises(IncompatibleFrequency, match=msg):
Expand Down
87 changes: 74 additions & 13 deletions pandas/tests/indexes/period/test_period_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def test_construction_from_period(self):

def test_mismatched_start_end_freq_raises(self):
depr_msg = "Period with BDay freq is deprecated"
end_w = Period("2006-12-31", "1w")
end_w = Period("2006-12-31", "1W")

with tm.assert_produces_warning(FutureWarning, match=depr_msg):
start_b = Period("02-Apr-2005", "B")
Expand All @@ -203,19 +203,80 @@ def test_constructor_U(self):
with pytest.raises(ValueError, match="Invalid frequency: X"):
period_range("2007-1-1", periods=500, freq="X")

def test_H_deprecated_from_time_series(self):
# GH#52536
msg = "'H' is deprecated and will be removed in a future version."
@pytest.mark.parametrize(
"freq,freq_depr",
[
("2Y", "2A"),
("2Y", "2a"),
("2Y-AUG", "2A-AUG"),
("2Y-AUG", "2A-aug"),
],
)
def test_a_deprecated_from_time_series(self, freq, freq_depr):
# GH#52536,GH#56346
msg = f"'{freq_depr[1:]}' is deprecated and will be removed in a "
f"future version. Please use '{freq[1:]}' instead."

with tm.assert_produces_warning(FutureWarning, match=msg):
period_range(freq="2H", start="1/1/2001", end="12/1/2009")
period_range(freq=freq_depr, start="1/1/2001", end="12/1/2009")

@pytest.mark.parametrize(
"freq, freq_depr",
[
("2M", "2m"),
("2Q-SEP", "2q-sep"),
("2Y", "2y"),
("2s", "2S"),
("2h", "2H"),
],
)
def test_lowercase_freq_deprecated_from_time_series(self, freq, freq_depr):
# GH#52536, GH#54939, ,GH#56346
msg = f"'{freq_depr[1:]}' is deprecated and will be removed in a "
f"future version. Please use '{freq[1:]}' instead."

with tm.assert_produces_warning(FutureWarning, match=msg):
period_range(freq=freq_depr, start="1/1/2001", end="12/1/2009")

@pytest.mark.parametrize(
"freq, freq_depr",
[
("2us", "2US"),
("2ns", "2NS"),
],
)
def test_uppercase_us_ns_deprecated_from_time_series(self, freq, freq_depr):
# GH#52536, GH#54939, ,GH#56346
msg = f"'{freq_depr[1:]}' is deprecated and will be removed in a "
f"future version. Please use '{freq[1:]}' instead."

with tm.assert_produces_warning(FutureWarning, match=msg):
period_range("2020-01-01 00:00:00 00:00", periods=2, freq=freq_depr)

@pytest.mark.parametrize(
"freq, freq_depr",
[
("2us", "2uS"),
("2ns", "2Ns"),
],
)
def test_mixcase_us_ns_deprecated_from_time_series(self, freq, freq_depr):
# GH#52536, GH#54939, ,GH#56346
msg = f"'{freq_depr[1:]}' is deprecated, please use '{freq[1:]}' instead."

with tm.assert_produces_warning(FutureWarning, match=msg):
period_range("2020-01-01 00:00:00 00:00", periods=2, freq=freq_depr)

@pytest.mark.parametrize(
"freq, freq_depr",
[
("2W", "2w"),
("2min", "2MIN"),
],
)
def test_uppercase_freq_deprecated_from_time_series(self, freq, freq_depr):
# GH#9586, GH#54939
msg = f"'{freq_depr[1:]}' is deprecated, please use '{freq[1:]}' instead."

@pytest.mark.parametrize("freq_depr", ["2A", "A-DEC", "200A-AUG"])
def test_a_deprecated_from_time_series(self, freq_depr):
# GH#52536
freq_msg = freq_depr[freq_depr.index("A") :]
msg = (
f"'{freq_msg}' is deprecated and will be removed in a future version, "
f"please use 'Y{freq_msg[1:]}' instead."
)
with tm.assert_produces_warning(FutureWarning, match=msg):
period_range(freq=freq_depr, start="1/1/2001", end="12/1/2009")
19 changes: 19 additions & 0 deletions pandas/tests/indexes/timedeltas/test_timedelta_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,25 @@ def test_timedelta_range_H_T_deprecated(self, freq, msg_freq):
expected = timedelta_range(start="0 days", end="4 days", freq=freq)
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize(
"freq_depr, freq",
[
("H", "h"),
# ("MIN", "min"),
("S", "s"),
("US", "us"),
("NS", "ns"),
("uS", "us"),
("Ns", "ns"),
],
)
def test_timedelta_range_uppercase_freq_deprecated(self, freq, freq_depr):
# GH#56346
expected = to_timedelta(np.arange(5), unit=freq)
with tm.assert_produces_warning(FutureWarning, match=freq_depr):
result = to_timedelta(np.arange(5), unit=freq_depr)
tm.assert_index_equal(result, expected)

def test_errors(self):
# not enough params
msg = (
Expand Down
Loading