Skip to content

Warn when creating Period with a string that includes timezone information #47990

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 21 commits into from
Closed
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Bug fixes
- Bug in :meth:`DataFrame.to_sql` when ``method`` was a ``callable`` that did not return an ``int`` and would raise a ``TypeError`` (:issue:`46891`)
- Bug in :func:`read_xml` when reading XML files with Chinese character tags and would raise ``XMLSyntaxError`` (:issue:`47902`)
- Bug in :meth:`loc.__getitem__` with a list of keys causing an internal inconsistency that could lead to a disconnect between ``frame.at[x, y]`` vs ``frame[y].loc[x]`` (:issue:`22372`)
- :class:`Period` now raises a warning when created with data that contains timezone information. This is necessary because :class:`Period`, :class:`PeriodArray` and :class:`PeriodIndex` do not support timezones and hence drop any timezone information used when creating them. (:issue:`47005`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move to 1.5 this is not a change we would backport

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move to 1.5 this is not a change we would backport

I don't understand how the versioning works. Should this go into 1.5.3 or 1.6.0? I put it in v1.5.3.rst for now.


.. ---------------------------------------------------------------------------

Expand Down
7 changes: 7 additions & 0 deletions pandas/_libs/tslibs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2614,6 +2614,13 @@ class Period(_Period):
raise ValueError(msg)

if ordinal is None:
if dt.tzinfo:
# GH 47005
warnings.warn(
"The pandas.Period class does not support timezones. "
"The timezone given in '%s' will be ignored." % value,
UserWarning
)
base = freq_to_dtype_code(freq)
ordinal = period_ordinal(dt.year, dt.month, dt.day,
dt.hour, dt.minute, dt.second,
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,11 @@ def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime):
-------
lower, upper: pd.Timestamp
"""
per = Period(parsed, freq=reso.attr_abbrev)
with warnings.catch_warnings():
# Period looses tzinfo. We ignore the corresponding warning here,
# and add the lost tzinfo below.
warnings.simplefilter("ignore", UserWarning)
per = Period(parsed, freq=reso.attr_abbrev)
start, end = per.start_time, per.end_time

# GH 24076
Expand Down
5 changes: 4 additions & 1 deletion pandas/plotting/_matplotlib/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
Iterator,
cast,
)
import warnings

from dateutil.relativedelta import relativedelta
import matplotlib.dates as dates
Expand Down Expand Up @@ -264,7 +265,9 @@ def get_datevalue(date, freq):
if isinstance(date, Period):
return date.asfreq(freq).ordinal
elif isinstance(date, (str, datetime, pydt.date, pydt.time, np.datetime64)):
return Period(date, freq).ordinal
with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
return Period(date, freq).ordinal
elif (
is_integer(date)
or is_float(date)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1070,7 +1070,7 @@ class TestPeriodArray(SharedTests):
index_cls = PeriodIndex
array_cls = PeriodArray
scalar_type = Period
example_dtype = PeriodIndex([], freq="W").dtype
example_dtype = PeriodIndex([], freq="Q").dtype

@pytest.fixture
def arr1d(self, period_index):
Expand Down
16 changes: 10 additions & 6 deletions pandas/tests/indexes/datetimes/methods/test_to_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,13 @@ def test_to_period_millisecond(self):
)

with tm.assert_produces_warning(UserWarning):
# warning that timezone info will be lost
# GH 21333 - warning that timezone info will be lost
period = index.to_period(freq="L")
assert 2 == len(period)
assert period[0] == Period("2007-01-01 10:11:12.123Z", "L")
assert period[1] == Period("2007-01-01 10:11:13.789Z", "L")
with tm.assert_produces_warning(UserWarning):
# GH 47005 - warning that timezone info will be lost
assert period[0] == Period("2007-01-01 10:11:12.123Z", "L")
assert period[1] == Period("2007-01-01 10:11:13.789Z", "L")

def test_to_period_microsecond(self):
index = DatetimeIndex(
Expand All @@ -132,11 +134,13 @@ def test_to_period_microsecond(self):
)

with tm.assert_produces_warning(UserWarning):
# warning that timezone info will be lost
# GH 21333 - warning that timezone info will be lost
period = index.to_period(freq="U")
assert 2 == len(period)
assert period[0] == Period("2007-01-01 10:11:12.123456Z", "U")
assert period[1] == Period("2007-01-01 10:11:13.789123Z", "U")
with tm.assert_produces_warning(UserWarning):
# GH 47005 - warning that timezone info will be lost
assert period[0] == Period("2007-01-01 10:11:12.123456Z", "U")
assert period[1] == Period("2007-01-01 10:11:13.789123Z", "U")

@pytest.mark.parametrize(
"tz",
Expand Down
12 changes: 11 additions & 1 deletion pandas/tests/indexes/period/test_period_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_required_arguments(self):
with pytest.raises(ValueError, match=msg):
period_range("2011-1-1", "2012-1-1", "B")

@pytest.mark.parametrize("freq", ["D", "W", "M", "Q", "A"])
@pytest.mark.parametrize("freq", ["D", "M", "Q", "A"])
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The "W" here tests for functionality that is not properly implemented in Pandas. Namely creating a Period from a string representing a week such as "2017-01-23/2017-01-29". This only works by chance and doesn't work for weeks in the 60s, 70s, 80s, 90s of any century or any year from 2360 onwards. This is because dateutil.parser.parse interprets the second year as hours and minutes.

This is a bit sad because the given string is Pandas' string representation of a week period and it would be nice to have bi-directionality.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you address this in a separate PR just to keep this PR tightly scoped?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could but doing so would stall this PR until the "W" PR is merged. The two test cases I changed raise the warning introduced in this PR. How should we proceed?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure I fully understand where and how a "W" frequency in particular is impacted by this new warning.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The two tests that I propose to change create a Period, convert it to a str, and then try to create a Period again from the string while also telling the constructor what the freq is. Such a functionality isn't deliberately implemented anywhere for "W", the code path goes to dateutil.parser.parse which accepts these "W" period strings only if the year can be interpreted as a time. I've explained this in the comment above and also in #48000 - there's a section on weeks.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, this appears to be an issue, but I don't understand why these "W" changes need to made in the same PR as adding the warning for Periods with timezones since they seem independent. Having them as separate PRs makes tracking issues easier in the long run.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This only applies to "W" because it has to do with what the string representation of a week period is and how it's parsed. Some part of that week string is interpreted as a timezone. If I remember correctly, it's the hyphen and the day (-29 in the example). dateutil.parser.parse reads that as a timezone offset of negative 29 hours. Converting back to a Period will throw the warning introduced in this PR.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah okay thanks. That explanation makes sense.

But it would be good to still keep testing "W" even if it works by chance. (but maybe include a comment why it works by chance)

Copy link
Contributor Author

@joooeey joooeey Aug 27, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And ignore the warning in the test or what? The CI requires that the test suite doesn't throw warnings, no? It sounds like implementing that would make the test a little complex. I'd have to special-case the "W", so the warning is only ignored there and not for the other frequencies.

In general, what's the point of testing for undocumented & broken functionality like this? Do we have reason to believe that library users are relying on parsing these kind of strings? To me the conversion from Period to String and back seems like a hack to write those tests quickly. Is this something we need to support going forward?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can use pytest.param("W", marks=pytest.mark.filterwarnings(...)) in the parameterize.

Generally, we have seen all sorts of undocumented use cases and have gotten issues when those behaviors "broke". I imagine this should be supported eventually.

def test_construction_from_string(self, freq):
# non-empty
expected = date_range(
Expand Down Expand Up @@ -119,3 +119,13 @@ def test_errors(self):
msg = "periods must be a number, got foo"
with pytest.raises(TypeError, match=msg):
period_range(start="2017Q1", periods="foo")


def test_range_tz():
# GH 47005 Time zone should be ignored with warning.
with tm.assert_produces_warning(UserWarning):
pi_tz = period_range(
"2022-01-01 06:00:00+02:00", "2022-01-01 09:00:00+02:00", freq="H"
)
pi_naive = period_range("2022-01-01 06:00:00", "2022-01-01 09:00:00", freq="H")
tm.assert_index_equal(pi_tz, pi_naive)
8 changes: 8 additions & 0 deletions pandas/tests/indexing/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,3 +168,11 @@ def test_getitem_str_slice_millisecond_resolution(self, frame_or_series):
],
)
tm.assert_equal(result, expected)


def test_slice_with_datestring_tz():
# GH 24076
# GH 16785
df = DataFrame([0], index=pd.DatetimeIndex(["2019-01-01"], tz="US/Pacific"))
sliced = df["2019-01-01 12:00:00+04:00":"2019-01-01 13:00:00+04:00"]
tm.assert_frame_equal(sliced, df)
39 changes: 24 additions & 15 deletions pandas/tests/resample/test_period_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,12 +263,16 @@ def test_with_local_timezone_pytz(self):

series = Series(1, index=index)
series = series.tz_convert(local_timezone)
result = series.resample("D", kind="period").mean()

# Create the expected series
# Index is moved back a day with the timezone conversion from UTC to
# Pacific
expected_index = period_range(start=start, end=end, freq="D") - offsets.Day()
# see gh-47005
with tm.assert_produces_warning(UserWarning):
result = series.resample("D", kind="period").mean()

# Create the expected series
# Index is moved back a day with the timezone conversion from UTC to
# Pacific
expected_index = (
period_range(start=start, end=end, freq="D") - offsets.Day()
)
expected = Series(1.0, index=expected_index)
tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -304,14 +308,16 @@ def test_with_local_timezone_dateutil(self):

series = Series(1, index=index)
series = series.tz_convert(local_timezone)
result = series.resample("D", kind="period").mean()

# Create the expected series
# Index is moved back a day with the timezone conversion from UTC to
# Pacific
expected_index = (
period_range(start=start, end=end, freq="D", name="idx") - offsets.Day()
)
# see gh-47005
with tm.assert_produces_warning(UserWarning):
result = series.resample("D", kind="period").mean()

# Create the expected series
# Index is moved back a day with the timezone conversion from UTC to
# Pacific
expected_index = (
period_range(start=start, end=end, freq="D", name="idx") - offsets.Day()
)
expected = Series(1.0, index=expected_index)
tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -504,7 +510,10 @@ def test_resample_tz_localized(self):
tm.assert_series_equal(result, expected)

# for good measure
result = s.resample("D", kind="period").mean()
# see gh-47005
with tm.assert_produces_warning(UserWarning):
result = s.resample("D", kind="period").mean()

ex_index = period_range("2001-09-20", periods=1, freq="D")
expected = Series([1.5], index=ex_index)
tm.assert_series_equal(result, expected)
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/scalar/period/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -1616,3 +1616,18 @@ def test_invalid_frequency_error_message():
msg = "Invalid frequency: <WeekOfMonth: week=0, weekday=0>"
with pytest.raises(ValueError, match=msg):
Period("2012-01-02", freq="WOM-1MON")


@pytest.mark.parametrize(
"val",
[
("20220101T123456", "Z"),
("2012-12-12T06:06:06", "-06:00"),
],
)
def test_period_with_timezone(val):
# GH 47005 Time zone should be ignored with warning.
with tm.assert_produces_warning(UserWarning):
p_tz = Period("".join(val), freq="s")
p_naive = Period(val[0], freq="s")
assert p_tz == p_naive