Skip to content

Commit 55b6914

Browse files
committed
BUG: Pandas can't restore index from parquet with offset-specified timezone #35997
1 parent c688a0f commit 55b6914

File tree

2 files changed

+61
-1
lines changed

2 files changed

+61
-1
lines changed

pandas/_libs/tslibs/timezones.pyx

+9-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from datetime import timezone
1+
from datetime import timedelta, timezone
22

33
from cpython.datetime cimport datetime, timedelta, tzinfo
44

@@ -102,6 +102,14 @@ cpdef inline tzinfo maybe_get_tz(object tz):
102102
# On Python 3 on Windows, the filename is not always set correctly.
103103
if isinstance(tz, _dateutil_tzfile) and '.tar.gz' in tz._filename:
104104
tz._filename = zone
105+
elif tz[0] in {'-', '+'}:
106+
hours = int(tz[0:3])
107+
minutes = int(tz[0] + tz[4:6])
108+
tz = timezone(timedelta(hours=hours, minutes=minutes))
109+
elif tz[0:4] in {'UTC-', 'UTC+'}:
110+
hours = int(tz[3:6])
111+
minutes = int(tz[3] + tz[7:9])
112+
tz = timezone(timedelta(hours=hours, minutes=minutes))
105113
else:
106114
tz = pytz.timezone(tz)
107115
elif is_integer_object(tz):

pandas/tests/io/test_parquet.py

+52
Original file line numberDiff line numberDiff line change
@@ -712,6 +712,31 @@ def test_timestamp_nanoseconds(self, pa):
712712
df = pd.DataFrame({"a": pd.date_range("2017-01-01", freq="1n", periods=10)})
713713
check_round_trip(df, pa, write_kwargs={"version": "2.0"})
714714

715+
@td.skip_if_no("pyarrow", min_version="0.14")
716+
@pytest.mark.parametrize(
717+
"date_value",
718+
[
719+
datetime.datetime.now(datetime.timezone.utc),
720+
datetime.datetime.strptime(
721+
"2019-01-04T16:41:24-0215", "%Y-%m-%dT%H:%M:%S%z"
722+
),
723+
datetime.datetime.strptime(
724+
"2019-01-04T16:41:24-0215", "%Y-%m-%dT%H:%M:%S%z"
725+
),
726+
datetime.datetime.strptime(
727+
"2019-01-04T16:41:24-0215", "%Y-%m-%dT%H:%M:%S%z"
728+
),
729+
datetime.datetime.strptime(
730+
"2019-01-04T16:41:24-0215", "%Y-%m-%dT%H:%M:%S%z"
731+
),
732+
],
733+
)
734+
def test_timezone_aware_index(self, pa, date_value):
735+
idx = 5 * [date_value]
736+
737+
df = pd.DataFrame(index=idx)
738+
check_round_trip(df, pa, write_kwargs={"version": "2.0"})
739+
715740
@td.skip_if_no("pyarrow", min_version="0.17")
716741
def test_filter_row_groups(self, pa):
717742
# https://github.com/pandas-dev/pandas/issues/26551
@@ -850,3 +875,30 @@ def test_empty_dataframe(self, fp):
850875
expected = df.copy()
851876
expected.index.name = "index"
852877
check_round_trip(df, fp, expected=expected)
878+
879+
@pytest.mark.parametrize(
880+
"date_value",
881+
[
882+
datetime.datetime.now(datetime.timezone.utc),
883+
datetime.datetime.strptime(
884+
"2019-01-04T16:41:24-0215", "%Y-%m-%dT%H:%M:%S%z"
885+
),
886+
datetime.datetime.strptime(
887+
"2019-01-04T16:41:24-0215", "%Y-%m-%dT%H:%M:%S%z"
888+
),
889+
datetime.datetime.strptime(
890+
"2019-01-04T16:41:24-0215", "%Y-%m-%dT%H:%M:%S%z"
891+
),
892+
datetime.datetime.strptime(
893+
"2019-01-04T16:41:24-0215", "%Y-%m-%dT%H:%M:%S%z"
894+
),
895+
],
896+
)
897+
def test_timezone_aware_index(self, fp, date_value):
898+
idx = 5 * [date_value]
899+
900+
df = pd.DataFrame(index=idx, data={"col": 5 * [0]})
901+
902+
expected = df.copy()
903+
expected.index.name = "index"
904+
check_round_trip(df, fp, expected=expected)

0 commit comments

Comments
 (0)