From 938dca5b0d08807ec385135115e247fbe04fa41d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 9 Feb 2018 08:19:07 -0800 Subject: [PATCH 1/5] set seen_foo at the top of the inning --- pandas/_libs/tslib.pyx | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 877d7deff6ff4..01543fb454e57 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -549,10 +549,10 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', raise elif PyDate_Check(val): + seen_datetime = 1 iresult[i] = pydate_to_dt64(val, &dts) try: check_dts_bounds(&dts) - seen_datetime = 1 except ValueError: if is_coerce: iresult[i] = NPY_NAT @@ -560,12 +560,12 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', raise elif is_datetime64_object(val): + seen_datetime = 1 if get_datetime64_value(val) == NPY_NAT: iresult[i] = NPY_NAT else: try: iresult[i] = get_datetime64_nanos(val) - seen_datetime = 1 except ValueError: if is_coerce: iresult[i] = NPY_NAT @@ -574,19 +574,18 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', elif is_integer_object(val) or is_float_object(val): # these must be ns unit by-definition + seen_integer = 1 if val != val or val == NPY_NAT: iresult[i] = NPY_NAT elif is_raise or is_ignore: iresult[i] = val - seen_integer = 1 else: # coerce # we now need to parse this as if unit='ns' # we can ONLY accept integers at this point # if we have previously (or in future accept # datetimes/strings, then we must coerce) - seen_integer = 1 try: iresult[i] = cast_from_unit(val, 'ns') except: @@ -594,13 +593,12 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', elif is_string_object(val): # string + seen_string = 1 if len(val) == 0 or val in nat_strings: iresult[i] = NPY_NAT continue - seen_string = 1 - try: _string_to_dts(val, &dts, &out_local, &out_tzoffset) value = dtstruct_to_dt64(&dts) From 69ed2375564c00d670f0097caba8e442f5c205b2 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 9 Feb 2018 08:38:36 -0800 Subject: [PATCH 2/5] catch exceptions in better order --- pandas/_libs/tslib.pyx | 68 ++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 33 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 01543fb454e57..2b68841dd0149 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -601,37 +601,15 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', try: _string_to_dts(val, &dts, &out_local, &out_tzoffset) - value = dtstruct_to_dt64(&dts) - if out_local == 1: - tz = pytz.FixedOffset(out_tzoffset) - value = tz_convert_single(value, tz, 'UTC') - iresult[i] = value - check_dts_bounds(&dts) - except OutOfBoundsDatetime: - # GH#19382 for just-barely-OutOfBounds falling back to - # dateutil parser will return incorrect result because - # it will ignore nanoseconds - if require_iso8601: - if _parse_today_now(val, &iresult[i]): - continue - elif is_coerce: - iresult[i] = NPY_NAT - continue - elif is_raise: - raise ValueError("time data {val} doesn't match " - "format specified" - .format(val=val)) - return values - elif is_coerce: - iresult[i] = NPY_NAT - continue - raise except ValueError: - # if requiring iso8601 strings, skip trying other formats - if require_iso8601: - if _parse_today_now(val, &iresult[i]): - continue - elif is_coerce: + # A ValueError at this point is a _parsing_ error + # specifically _not_ OutOfBoundsDatetime + if _parse_today_now(val, &iresult[i]): + continue + elif require_iso8601: + # if requiring iso8601 strings, skip trying + # other formats + if is_coerce: iresult[i] = NPY_NAT continue elif is_raise: @@ -644,8 +622,6 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', py_dt = parse_datetime_string(val, dayfirst=dayfirst, yearfirst=yearfirst) except Exception: - if _parse_today_now(val, &iresult[i]): - continue if is_coerce: iresult[i] = NPY_NAT continue @@ -654,16 +630,42 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', try: _ts = convert_datetime_to_tsobject(py_dt, None) iresult[i] = _ts.value - except ValueError: + except OutOfBoundsDatetime: if is_coerce: iresult[i] = NPY_NAT continue raise except: + # TODO: What exception are we concerned with here? if is_coerce: iresult[i] = NPY_NAT continue raise + else: + # No error raised by string_to_dts, pick back up + # where we left off + value = dtstruct_to_dt64(&dts) + if out_local == 1: + tz = pytz.FixedOffset(out_tzoffset) + value = tz_convert_single(value, tz, 'UTC') + iresult[i] = value + try: + check_dts_bounds(&dts) + except OutOfBoundsDatetime: + # GH#19382 for just-barely-OutOfBounds falling back to + # dateutil parser will return incorrect result because + # it will ignore nanoseconds + if is_coerce: + iresult[i] = NPY_NAT + continue + elif require_iso8601: + if is_raise: + raise ValueError("time data {val} doesn't " + "match format specified" + .format(val=val)) + return values + raise + else: if is_coerce: iresult[i] = NPY_NAT From bfd19f5b9f3093eef688064b2cbff46eaac6793d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 9 Feb 2018 13:09:54 -0800 Subject: [PATCH 3/5] Fix and test unicode now/today error --- pandas/_libs/tslib.pyx | 12 ++++++++---- pandas/tests/indexes/datetimes/test_tools.py | 9 ++++++++- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 2b68841dd0149..9cc535bdd7098 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -7,7 +7,7 @@ import numpy as np cnp.import_array() -from cpython cimport PyFloat_Check +from cpython cimport PyFloat_Check, PyUnicode_Check from util cimport (is_integer_object, is_float_object, is_string_object, is_datetime64_object) @@ -598,6 +598,8 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', if len(val) == 0 or val in nat_strings: iresult[i] = NPY_NAT continue + if PyUnicode_Check(val): + val = val.encode('utf-8') try: _string_to_dts(val, &dts, &out_local, &out_tzoffset) @@ -691,6 +693,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', return result except OutOfBoundsDatetime: + raise if is_raise: raise @@ -713,6 +716,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', oresult[i] = val return oresult except TypeError: + raise oresult = np.empty(n, dtype=object) for i in range(n): @@ -743,14 +747,14 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', return oresult -cdef inline bint _parse_today_now(str val, int64_t* iresult): +cdef inline bint _parse_today_now(bytes val, int64_t* iresult): # We delay this check for as long as possible # because it catches relatively rare cases - if val == 'now': + if val == b'now': # Note: this is *not* the same as Timestamp('now') iresult[0] = Timestamp.utcnow().value return True - elif val == 'today': + elif val == b'today': # Note: this is *not* the same as Timestamp('today') iresult[0] = Timestamp.now().normalize().value return True diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index f8b1f68ba33ce..b95ae07052ecb 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -18,7 +18,7 @@ from pandas.core.tools import datetimes as tools from pandas.errors import OutOfBoundsDatetime -from pandas.compat import lmap +from pandas.compat import lmap, PY3 from pandas.compat.numpy import np_array_datetime64_compat from pandas.core.dtypes.common import is_datetime64_ns_dtype from pandas.util import testing as tm @@ -238,6 +238,13 @@ def test_to_datetime_today(self): assert pdtoday.tzinfo is None assert pdtoday2.tzinfo is None + def test_to_datetime_today_now_unicode_bytes(self): + to_datetime([u'now']) + to_datetime([u'today']) + if not PY3: + to_datetime(['now']) + to_datetime(['today']) + @pytest.mark.parametrize('cache', [True, False]) def test_to_datetime_dt64s(self, cache): in_bound_dts = [ From 5c38b9ae70cf68bdff2d948f52eba8973e9daa87 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 9 Feb 2018 13:10:41 -0800 Subject: [PATCH 4/5] remove debugging raises --- pandas/_libs/tslib.pyx | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 9cc535bdd7098..73bb80457bb53 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -693,7 +693,6 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', return result except OutOfBoundsDatetime: - raise if is_raise: raise @@ -716,7 +715,6 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', oresult[i] = val return oresult except TypeError: - raise oresult = np.empty(n, dtype=object) for i in range(n): From f70792b0519929a6d11062d516cf2484c4992a56 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 9 Feb 2018 18:56:25 -0800 Subject: [PATCH 5/5] only cast to bytes in py2 --- pandas/_libs/tslib.pyx | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 73bb80457bb53..a035bab2a7049 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -56,6 +56,8 @@ from tslibs.timestamps cimport (create_timestamp_from_ts, _NS_UPPER_BOUND, _NS_LOWER_BOUND) from tslibs.timestamps import Timestamp +cdef bint PY2 = str == bytes + cdef inline object create_datetime_from_ts( int64_t value, pandas_datetimestruct dts, @@ -598,7 +600,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', if len(val) == 0 or val in nat_strings: iresult[i] = NPY_NAT continue - if PyUnicode_Check(val): + if PyUnicode_Check(val) and PY2: val = val.encode('utf-8') try: @@ -745,14 +747,14 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', return oresult -cdef inline bint _parse_today_now(bytes val, int64_t* iresult): +cdef inline bint _parse_today_now(str val, int64_t* iresult): # We delay this check for as long as possible # because it catches relatively rare cases - if val == b'now': + if val == 'now': # Note: this is *not* the same as Timestamp('now') iresult[0] = Timestamp.utcnow().value return True - elif val == b'today': + elif val == 'today': # Note: this is *not* the same as Timestamp('today') iresult[0] = Timestamp.now().normalize().value return True