diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 877d7deff6ff4..a035bab2a7049 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -7,7 +7,7 @@ import numpy as np cnp.import_array() -from cpython cimport PyFloat_Check +from cpython cimport PyFloat_Check, PyUnicode_Check from util cimport (is_integer_object, is_float_object, is_string_object, is_datetime64_object) @@ -56,6 +56,8 @@ from tslibs.timestamps cimport (create_timestamp_from_ts, _NS_UPPER_BOUND, _NS_LOWER_BOUND) from tslibs.timestamps import Timestamp +cdef bint PY2 = str == bytes + cdef inline object create_datetime_from_ts( int64_t value, pandas_datetimestruct dts, @@ -549,10 +551,10 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', raise elif PyDate_Check(val): + seen_datetime = 1 iresult[i] = pydate_to_dt64(val, &dts) try: check_dts_bounds(&dts) - seen_datetime = 1 except ValueError: if is_coerce: iresult[i] = NPY_NAT @@ -560,12 +562,12 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', raise elif is_datetime64_object(val): + seen_datetime = 1 if get_datetime64_value(val) == NPY_NAT: iresult[i] = NPY_NAT else: try: iresult[i] = get_datetime64_nanos(val) - seen_datetime = 1 except ValueError: if is_coerce: iresult[i] = NPY_NAT @@ -574,19 +576,18 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', elif is_integer_object(val) or is_float_object(val): # these must be ns unit by-definition + seen_integer = 1 if val != val or val == NPY_NAT: iresult[i] = NPY_NAT elif is_raise or is_ignore: iresult[i] = val - seen_integer = 1 else: # coerce # we now need to parse this as if unit='ns' # we can ONLY accept integers at this point # if we have previously (or in future accept # datetimes/strings, then we must coerce) - seen_integer = 1 try: iresult[i] = cast_from_unit(val, 'ns') except: @@ -594,46 +595,25 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', elif is_string_object(val): # string + seen_string = 1 if len(val) == 0 or val in nat_strings: iresult[i] = NPY_NAT continue - - seen_string = 1 + if PyUnicode_Check(val) and PY2: + val = val.encode('utf-8') try: _string_to_dts(val, &dts, &out_local, &out_tzoffset) - value = dtstruct_to_dt64(&dts) - if out_local == 1: - tz = pytz.FixedOffset(out_tzoffset) - value = tz_convert_single(value, tz, 'UTC') - iresult[i] = value - check_dts_bounds(&dts) - except OutOfBoundsDatetime: - # GH#19382 for just-barely-OutOfBounds falling back to - # dateutil parser will return incorrect result because - # it will ignore nanoseconds - if require_iso8601: - if _parse_today_now(val, &iresult[i]): - continue - elif is_coerce: - iresult[i] = NPY_NAT - continue - elif is_raise: - raise ValueError("time data {val} doesn't match " - "format specified" - .format(val=val)) - return values - elif is_coerce: - iresult[i] = NPY_NAT - continue - raise except ValueError: - # if requiring iso8601 strings, skip trying other formats - if require_iso8601: - if _parse_today_now(val, &iresult[i]): - continue - elif is_coerce: + # A ValueError at this point is a _parsing_ error + # specifically _not_ OutOfBoundsDatetime + if _parse_today_now(val, &iresult[i]): + continue + elif require_iso8601: + # if requiring iso8601 strings, skip trying + # other formats + if is_coerce: iresult[i] = NPY_NAT continue elif is_raise: @@ -646,8 +626,6 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', py_dt = parse_datetime_string(val, dayfirst=dayfirst, yearfirst=yearfirst) except Exception: - if _parse_today_now(val, &iresult[i]): - continue if is_coerce: iresult[i] = NPY_NAT continue @@ -656,16 +634,42 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', try: _ts = convert_datetime_to_tsobject(py_dt, None) iresult[i] = _ts.value - except ValueError: + except OutOfBoundsDatetime: if is_coerce: iresult[i] = NPY_NAT continue raise except: + # TODO: What exception are we concerned with here? if is_coerce: iresult[i] = NPY_NAT continue raise + else: + # No error raised by string_to_dts, pick back up + # where we left off + value = dtstruct_to_dt64(&dts) + if out_local == 1: + tz = pytz.FixedOffset(out_tzoffset) + value = tz_convert_single(value, tz, 'UTC') + iresult[i] = value + try: + check_dts_bounds(&dts) + except OutOfBoundsDatetime: + # GH#19382 for just-barely-OutOfBounds falling back to + # dateutil parser will return incorrect result because + # it will ignore nanoseconds + if is_coerce: + iresult[i] = NPY_NAT + continue + elif require_iso8601: + if is_raise: + raise ValueError("time data {val} doesn't " + "match format specified" + .format(val=val)) + return values + raise + else: if is_coerce: iresult[i] = NPY_NAT diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index f8b1f68ba33ce..b95ae07052ecb 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -18,7 +18,7 @@ from pandas.core.tools import datetimes as tools from pandas.errors import OutOfBoundsDatetime -from pandas.compat import lmap +from pandas.compat import lmap, PY3 from pandas.compat.numpy import np_array_datetime64_compat from pandas.core.dtypes.common import is_datetime64_ns_dtype from pandas.util import testing as tm @@ -238,6 +238,13 @@ def test_to_datetime_today(self): assert pdtoday.tzinfo is None assert pdtoday2.tzinfo is None + def test_to_datetime_today_now_unicode_bytes(self): + to_datetime([u'now']) + to_datetime([u'today']) + if not PY3: + to_datetime(['now']) + to_datetime(['today']) + @pytest.mark.parametrize('cache', [True, False]) def test_to_datetime_dt64s(self, cache): in_bound_dts = [