Skip to content

Commit d1b792e

Browse files
committed
BUG: Pandas can't restore index from parquet with offset-specified timezone pandas-dev#35997
1 parent 2067d7e commit d1b792e

File tree

4 files changed

+65
-2
lines changed

4 files changed

+65
-2
lines changed

pandas/_libs/tslibs/timezones.pyx

+9-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from datetime import timezone
1+
from datetime import timedelta, timezone
22

33
from cpython.datetime cimport datetime, timedelta, tzinfo
44

@@ -102,6 +102,14 @@ cpdef inline tzinfo maybe_get_tz(object tz):
102102
# On Python 3 on Windows, the filename is not always set correctly.
103103
if isinstance(tz, _dateutil_tzfile) and '.tar.gz' in tz._filename:
104104
tz._filename = zone
105+
elif tz[0] in {'-', '+'}:
106+
hours = int(tz[0:3])
107+
minutes = int(tz[0] + tz[4:6])
108+
tz = timezone(timedelta(hours=hours, minutes=minutes))
109+
elif tz[0:4] in {'UTC-', 'UTC+'}:
110+
hours = int(tz[3:6])
111+
minutes = int(tz[3] + tz[7:9])
112+
tz = timezone(timedelta(hours=hours, minutes=minutes))
105113
else:
106114
tz = pytz.timezone(tz)
107115
elif is_integer_object(tz):

pandas/conftest.py

+4
Original file line numberDiff line numberDiff line change
@@ -860,6 +860,10 @@ def iris(datapath):
860860
"Asia/Tokyo",
861861
"dateutil/US/Pacific",
862862
"dateutil/Asia/Singapore",
863+
"+01:15",
864+
"-02:15",
865+
"UTC+01:15",
866+
"UTC-02:15",
863867
tzutc(),
864868
tzlocal(),
865869
FixedOffset(300),

pandas/tests/io/test_parquet.py

+30
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,21 @@ def df_full():
124124
)
125125

126126

127+
@pytest.fixture(
128+
params=[
129+
datetime.datetime.now(datetime.timezone.utc),
130+
datetime.datetime.now(datetime.timezone.min),
131+
datetime.datetime.now(datetime.timezone.max),
132+
datetime.datetime.strptime("2019-01-04T16:41:24+0200", "%Y-%m-%dT%H:%M:%S%z"),
133+
datetime.datetime.strptime("2019-01-04T16:41:24+0215", "%Y-%m-%dT%H:%M:%S%z"),
134+
datetime.datetime.strptime("2019-01-04T16:41:24-0200", "%Y-%m-%dT%H:%M:%S%z"),
135+
datetime.datetime.strptime("2019-01-04T16:41:24-0215", "%Y-%m-%dT%H:%M:%S%z"),
136+
]
137+
)
138+
def timezone_aware_date_list(request):
139+
return request.param
140+
141+
127142
def check_round_trip(
128143
df,
129144
engine=None,
@@ -724,6 +739,12 @@ def test_timestamp_nanoseconds(self, pa):
724739
df = pd.DataFrame({"a": pd.date_range("2017-01-01", freq="1n", periods=10)})
725740
check_round_trip(df, pa, write_kwargs={"version": "2.0"})
726741

742+
def test_timezone_aware_index(self, pa, timezone_aware_date_list):
743+
idx = 5 * [timezone_aware_date_list]
744+
745+
df = pd.DataFrame(index=idx, data={"index_as_col": idx})
746+
check_round_trip(df, pa)
747+
727748
@td.skip_if_no("pyarrow", min_version="0.17")
728749
def test_filter_row_groups(self, pa):
729750
# https://github.com/pandas-dev/pandas/issues/26551
@@ -862,3 +883,12 @@ def test_empty_dataframe(self, fp):
862883
expected = df.copy()
863884
expected.index.name = "index"
864885
check_round_trip(df, fp, expected=expected)
886+
887+
def test_timezone_aware_index(self, fp, timezone_aware_date_list):
888+
idx = 5 * [timezone_aware_date_list]
889+
890+
df = pd.DataFrame(index=idx, data={"index_as_col": idx})
891+
892+
expected = df.copy()
893+
expected.index.name = "index"
894+
check_round_trip(df, fp, expected=expected)

pandas/tests/tslibs/test_timezones.py

+22-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from datetime import datetime
1+
from datetime import datetime, timedelta, timezone
22

33
import dateutil.tz
44
import pytest
@@ -118,3 +118,24 @@ def test_maybe_get_tz_invalid_types():
118118
msg = "<class 'pandas._libs.tslibs.timestamps.Timestamp'>"
119119
with pytest.raises(TypeError, match=msg):
120120
timezones.maybe_get_tz(Timestamp.now("UTC"))
121+
122+
def test_maybe_get_tz_offset_only():
123+
# see gh-36004
124+
125+
# timezone.utc
126+
tz = timezones.maybe_get_tz(timezone.utc)
127+
assert tz == timezone(timedelta(hours=0, minutes=0))
128+
129+
# without UTC+- prefix
130+
tz = timezones.maybe_get_tz("+01:15")
131+
assert tz == timezone(timedelta(hours=1, minutes=15))
132+
133+
tz = timezones.maybe_get_tz("-01:15")
134+
assert tz == timezone(-timedelta(hours=1, minutes=15))
135+
136+
# with UTC+- prefix
137+
tz = timezones.maybe_get_tz("UTC+02:45")
138+
assert tz == timezone(timedelta(hours=2, minutes=45))
139+
140+
tz = timezones.maybe_get_tz("UTC-02:45")
141+
assert tz == timezone(-timedelta(hours=2, minutes=45))

0 commit comments

Comments
 (0)