Skip to content

WIP/ENH: Pass tzinfos to dateutil parser #24104

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 27 additions & 12 deletions pandas/_libs/tslibs/parsing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import time

from cython import Py_ssize_t

from cpython.datetime cimport datetime
from cpython.datetime cimport datetime, tzinfo


import numpy as np
Expand All @@ -35,6 +35,8 @@ from nattype import nat_strings, NaT
# ----------------------------------------------------------------------
# Constants

_default_tzinfos = {}


class DateParseError(ValueError):
pass
Expand All @@ -51,7 +53,7 @@ cdef set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'}


def parse_datetime_string(date_string, freq=None, dayfirst=False,
yearfirst=False, **kwargs):
yearfirst=False, tzinfos=None, **kwargs):
"""parse datetime string, only returns datetime.
Also cares special handling matching time patterns.

Expand All @@ -66,10 +68,13 @@ def parse_datetime_string(date_string, freq=None, dayfirst=False,
if not _does_string_look_like_datetime(date_string):
raise ValueError('Given date string not likely a datetime.')

if tzinfos is None:
tzinfos = _default_tzinfos

if _TIMEPAT.match(date_string):
# use current datetime as default, not pass _DEFAULT_DATETIME
dt = du_parse(date_string, dayfirst=dayfirst,
yearfirst=yearfirst, **kwargs)
yearfirst=yearfirst, tzinfos=tzinfos, **kwargs)
return dt

try:
Expand All @@ -82,7 +87,8 @@ def parse_datetime_string(date_string, freq=None, dayfirst=False,

try:
dt = du_parse(date_string, default=_DEFAULT_DATETIME,
dayfirst=dayfirst, yearfirst=yearfirst, **kwargs)
dayfirst=dayfirst, yearfirst=yearfirst,
tzinfos=tzinfos, **kwargs)
except TypeError:
# following may be raised from dateutil
# TypeError: 'NoneType' object is not iterable
Expand Down Expand Up @@ -132,7 +138,7 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None):


cdef parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False,
yearfirst=False):
yearfirst=False, tzinfos=None):
"""parse datetime string, only returns datetime

Returns
Expand All @@ -153,6 +159,9 @@ cdef parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False,
if not _does_string_look_like_datetime(date_string):
raise ValueError('Given date string not likely a datetime.')

if tzinfos is None:
tzinfos = _default_tzinfos

try:
return _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq)
except DateParseError:
Expand All @@ -163,7 +172,7 @@ cdef parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False,
try:
parsed, reso = dateutil_parse(date_string, _DEFAULT_DATETIME,
dayfirst=dayfirst, yearfirst=yearfirst,
ignoretz=False, tzinfos=None)
ignoretz=False, tzinfos=tzinfos)
except Exception as e:
# TODO: allow raise of errors within instead
raise DateParseError(e)
Expand Down Expand Up @@ -305,8 +314,12 @@ cdef dateutil_parse(object timestr, object default, ignoretz=False,
object reso = None
dict repl = {}

if tzinfos is None:
tzinfos = _default_tzinfos

fobj = StringIO(str(timestr))
res = DEFAULTPARSER._parse(fobj, dayfirst=dayfirst, yearfirst=yearfirst)
res = DEFAULTPARSER._parse(fobj, dayfirst=dayfirst, yearfirst=yearfirst,
tzinfos=tzinfos)

# dateutil 2.2 compat
if isinstance(res, tuple): # PyTuple_Check
Expand Down Expand Up @@ -342,16 +355,18 @@ cdef dateutil_parse(object timestr, object default, ignoretz=False,
tzdata = tzinfos(res.tzname, res.tzoffset)
else:
tzdata = tzinfos.get(res.tzname)
if isinstance(tzdata, datetime.tzinfo):
tzinfo = tzdata

if isinstance(tzdata, tzinfo):
tzobj = tzdata
elif isinstance(tzdata, (str, unicode)):
tzinfo = _dateutil_tzstr(tzdata)
tzobj = _dateutil_tzstr(tzdata)
elif isinstance(tzdata, int):
tzinfo = tzoffset(res.tzname, tzdata)
tzobj = tzoffset(res.tzname, tzdata)
else:
raise ValueError("offset must be tzinfo subclass, "
"tz string, or int offset")
ret = ret.replace(tzinfo=tzinfo)

ret = ret.replace(tzinfo=tzobj)
elif res.tzname and res.tzname in time.tzname:
ret = ret.replace(tzinfo=_dateutil_tzlocal())
elif res.tzoffset == 0:
Expand Down
30 changes: 30 additions & 0 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
module is imported, register them here rather then in the module.

"""
from datetime import tzinfo

from pandas.compat import string_types

import pandas.core.config as cf
from pandas.core.config import (
is_bool, is_callable, is_instance_factory, is_int, is_one_of_factory,
Expand Down Expand Up @@ -505,3 +509,29 @@ def register_converter_cb(key):
with cf.config_prefix("plotting.matplotlib"):
cf.register_option("register_converters", True, register_converter_doc,
validator=bool, cb=register_converter_cb)

# ------------
# Date Parsing
# ------------

with cf.config_prefix("tslib"):
from pandas._libs.tslibs.parsing import _default_tzinfos

def tz_validator(val):
msg = ("value passed to set_option('tslib.tzinfos') must be a "
"dictionary with string keys and tzinfo values")
if not isinstance(val, dict):
raise ValueError(msg)

if not all(isinstance(key, string_types) and isinstance(value, tzinfo)
for key, value in val.items()):
raise ValueError(msg)

# TODO: Should this be done elsewhere?
_default_tzinfos.clear()
_default_tzinfos.update(val)

cf.register_option(
"tzinfos", _default_tzinfos,
"dictionary of tzinfo objects to pass to dateutil's parse function",
validator=tz_validator)
13 changes: 13 additions & 0 deletions pandas/tests/tslibs/test_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from datetime import datetime

from dateutil.parser import parse
from dateutil.tz import gettz
import numpy as np
import pytest

Expand Down Expand Up @@ -43,6 +44,18 @@ def test_parse_time_quarter_w_dash(self):


class TestDatetimeParsingWrappers(object):
def test_parse_with_tzinfos(self):
CST = gettz("US/Central")
tzinfos = {"CST": CST}

result = parsing.parse_datetime_string("2018-11-04 3:45 PM CST",
tzinfos=tzinfos)
# Note: We check similar parsing for to_datetime and
# Timestamp elsewhere

# comparing using identity works for dateutil tzinfos, not pytz
assert result.tzinfo is CST

def test_does_not_convert_mixed_integer(self):
bad_date_strings = ('-50000', '999', '123.1234', 'm', 'T')

Expand Down