Skip to content

Commit abc575f

Browse files
committed
Extend logic of parsing timezones
1 parent 8470cc7 commit abc575f

File tree

3 files changed

+114
-17
lines changed

3 files changed

+114
-17
lines changed

pandas/_libs/tslibs/strptime.pyx

+13-12
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ except:
2020
except:
2121
from _dummy_thread import allocate_lock as _thread_allocate_lock
2222

23+
import pytz
2324

2425
from cython cimport Py_ssize_t
2526
from cpython cimport PyFloat_Check
@@ -29,8 +30,7 @@ cimport cython
2930
import numpy as np
3031
from numpy cimport ndarray, int64_t
3132

32-
from datetime import (date as datetime_date, timedelta as datetime_timedelta,
33-
timezone as datetime_timezone)
33+
from datetime import date as datetime_date, timedelta as datetime_timedelta
3434
from cpython.datetime cimport datetime
3535

3636
from np_datetime cimport (check_dts_bounds,
@@ -59,7 +59,8 @@ def array_strptime(ndarray[object] values, object fmt,
5959
Py_ssize_t i, n = len(values)
6060
pandas_datetimestruct dts
6161
ndarray[int64_t] iresult
62-
ndarray[object] results_tz
62+
ndarray[object] results_tzoffset
63+
ndarray[object] results_tzname
6364
int year, month, day, minute, hour, second, weekday, julian, tz
6465
int week_of_year, week_of_year_start
6566
int64_t us, ns
@@ -111,7 +112,8 @@ def array_strptime(ndarray[object] values, object fmt,
111112
result = np.empty(n, dtype='M8[ns]')
112113
iresult = result.view('i8')
113114

114-
results_tz = np.empty(n, dtype='object')
115+
results_tzname = np.empty(n, dtype='object')
116+
results_tzoffset = np.empty(n, dtype='object')
115117

116118
dts.us = dts.ps = dts.as = 0
117119

@@ -165,7 +167,7 @@ def array_strptime(ndarray[object] values, object fmt,
165167
if is_coerce:
166168
iresult[i] = NPY_NAT
167169
continue
168-
raise ValueError("unconverted data remains: %s" %
170+
raise ValueError("Inconsistent data remains: %s" %
169171
values[i][found.end():])
170172

171173
# search
@@ -363,17 +365,16 @@ def array_strptime(ndarray[object] values, object fmt,
363365
continue
364366
raise
365367

368+
tzname = found_dict.get('Z')
369+
if tzname is not None:
370+
results_tzname[i] = tzname
371+
366372
if gmtoff is not None:
367373
tzdelta = datetime_timedelta(seconds=gmtoff,
368374
microseconds=gmtoff_fraction)
369-
tzname = found_dict.get('Z')
370-
if tzname:
371-
tzinfo = datetime_timezone(tzdelta, tzname)
372-
else:
373-
tzinfo = datetime_timezone(tzdelta, tzname)
374-
results_tz[i] = tzinfo
375+
results_tzoffset[i] = tzdelta
375376

376-
return result, results_tz
377+
return result, results_tzname, results_tzoffset
377378

378379

379380
"""_getlang, LocaleTime, TimeRE, _calc_julian_from_U_or_W are vendored

pandas/core/tools/datetimes.py

+69-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
from datetime import datetime, timedelta, time
2-
import numpy as np
32
from collections import MutableMapping
43

4+
import numpy as np
5+
import pytz
6+
57
from pandas._libs import tslib
68
from pandas._libs.tslibs.strptime import array_strptime
79
from pandas._libs.tslibs import parsing, conversion
@@ -27,7 +29,7 @@
2729
ABCDataFrame)
2830
from pandas.core.dtypes.missing import notna
2931
from pandas.core import algorithms
30-
32+
from pandas.compat import PY3, zip
3133

3234
def _guess_datetime_format_for_array(arr, **kwargs):
3335
# Try to guess the format based on the first non-NaN element
@@ -343,8 +345,71 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
343345
# fallback
344346
if result is None:
345347
try:
346-
result = array_strptime(arg, format, exact=exact,
347-
errors=errors)[0]
348+
parsing_tzname = '%Z' in format
349+
parsing_tzoffset = '%z' in format
350+
if tz is not None and (parsing_tzname or
351+
parsing_tzoffset):
352+
raise ValueError("Cannot pass a tz argument when "
353+
"parsing strings with timezone "
354+
"information.")
355+
result, tznames, tzoffsets = array_strptime(
356+
arg, format, exact=exact, errors=errors)
357+
if parsing_tzname and not parsing_tzoffset:
358+
if len(set(tznames)) == 1:
359+
tz = tznames[0]
360+
if box:
361+
result = DatetimeIndex(result,
362+
tz=tz,
363+
name=name)
364+
else:
365+
stamps = [tslibs.Timestamp(res, tz=tz)
366+
for res in result]
367+
result = np.array(stamps, dtype=object)
368+
else:
369+
stamps = [tslibs.Timestamp(res, tz=tz)
370+
for res, tz in zip(result, tznames)]
371+
result = np.array(stamps, dtype=object)
372+
return result
373+
elif parsing_tzoffset and not parsing_tzname:
374+
# Should we convert these to pytz.FixedOffsets
375+
# or datetime.timezones?
376+
if len(set(tzoffsets)) == 1:
377+
offset_mins = tzoffsets[0].total_seconds() / 60
378+
tzoffset = pytz.FixedOffset(offset_mins)
379+
if box:
380+
result = DatetimeIndex(result,
381+
tz=tzoffset,
382+
name=name)
383+
else:
384+
stamps = [tslibs.Timestamp(res,
385+
tzinfo=tzoffset)
386+
for res, offset in result]
387+
result = np.array(stamps, dtype=object)
388+
else:
389+
stamps = []
390+
for res, offset in zip(result, tzoffsets):
391+
offset_mins = offset.total_seconds() / 60
392+
tzoffset = pytz.FixedOffset(offset_mins)
393+
ts = tslibs.Timestamp(res, tzinfo=tzoffset)
394+
stamps.append(ts)
395+
result = np.array(stamps, dtype=object)
396+
return result
397+
elif parsing_tzoffset and parsing_tzname:
398+
if not PY3:
399+
raise ValueError("Parsing tzoffsets are not "
400+
"not supported in Python 3")
401+
from datetime import timezone
402+
stamps = []
403+
for res, offset, tzname in zip(result, tzoffsets,
404+
tznames):
405+
# Do we need to validate these timezones?
406+
# e.g. UTC / +0100
407+
tzinfo = timezone(offset, tzname)
408+
ts = tslib.Timestamp(res, tzinfo=tzinfo)
409+
stamps.append(ts)
410+
result = np.array(stamps, dtype=object)
411+
return result
412+
348413
except tslib.OutOfBoundsDatetime:
349414
if errors == 'raise':
350415
raise

pandas/tests/indexes/datetimes/test_tools.py

+32-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import dateutil
99
import numpy as np
1010
from dateutil.parser import parse
11-
from datetime import datetime, date, time
11+
from datetime import datetime, timedelta, date, time
1212
from distutils.version import LooseVersion
1313

1414
import pandas as pd
@@ -184,6 +184,37 @@ def test_to_datetime_format_weeks(self, cache):
184184
for s, format, dt in data:
185185
assert to_datetime(s, format=format, cache=cache) == dt
186186

187+
@pytest.skipif(not PY3, reason="datetime.timezone not supported in PY2")
188+
def test_to_datetime_parse_timezone(self):
189+
from datetime import timezone
190+
# %Z parsing only
191+
fmt = '%Y-%m-%d %H:%M:%S %Z'
192+
dates = ['2010-01-01 12:00:00 UTC'] * 2
193+
result = pd.to_datetime(dates, format=fmt)
194+
expected_dates = [pd.Timestamp('2010-01-01 12:00:00', tz='UTC')] * 2
195+
expected = pd.DatetimeIndex(expected_dates)
196+
tm.assert_index_equal(result, expected)
197+
198+
# %z parsing only
199+
dates = ['2010-01-01 12:00:00 +0100'] * 2
200+
fmt = '%Y-%m-%d %H:%M:%S %z'
201+
result = pd.to_datetime(dates, format=fmt)
202+
expected_dates = [pd.Timestamp('2010-01-01 12:00:00',
203+
tzinfo=pytz.FixedOffset(60))] * 2
204+
expected = pd.DatetimeIndex(expected_dates)
205+
tm.assert_index_equal(result, expected)
206+
207+
# %z and %Z parsing
208+
dates = ['2010-01-01 12:00:00 UTC +0100'] * 2
209+
fmt = '%Y-%m-%d %H:%M:%S %Z %z'
210+
result = pd.to_datetime(dates, format=fmt)
211+
tzinfo = timezone(timedelta(minutes=60), 'UTC')
212+
expected_dates = [pd.Timestamp('2010-01-01 12:00:00', tzinfo=tzinfo)]
213+
expected = np.array(expected_dates * 2, dtype=object)
214+
tm.assert_numpy_array_equal(result, expected)
215+
216+
with pytest.raises(ValueError):
217+
pd.to_datetime(dates, format=fmt, tz='US/Pacific')
187218

188219
class TestToDatetime(object):
189220
def test_to_datetime_pydatetime(self):

0 commit comments

Comments
 (0)