Skip to content

Commit d7797b4

Browse files
jbrockmendeljreback
authored andcommitted
order of exceptions in array_to_datetime (#19621)
1 parent fe972fb commit d7797b4

File tree

2 files changed

+52
-41
lines changed

2 files changed

+52
-41
lines changed

pandas/_libs/tslib.pyx

+44-40
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import numpy as np
77
cnp.import_array()
88

99

10-
from cpython cimport PyFloat_Check
10+
from cpython cimport PyFloat_Check, PyUnicode_Check
1111

1212
from util cimport (is_integer_object, is_float_object, is_string_object,
1313
is_datetime64_object)
@@ -56,6 +56,8 @@ from tslibs.timestamps cimport (create_timestamp_from_ts,
5656
_NS_UPPER_BOUND, _NS_LOWER_BOUND)
5757
from tslibs.timestamps import Timestamp
5858

59+
cdef bint PY2 = str == bytes
60+
5961

6062
cdef inline object create_datetime_from_ts(
6163
int64_t value, pandas_datetimestruct dts,
@@ -549,23 +551,23 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
549551
raise
550552

551553
elif PyDate_Check(val):
554+
seen_datetime = 1
552555
iresult[i] = pydate_to_dt64(val, &dts)
553556
try:
554557
check_dts_bounds(&dts)
555-
seen_datetime = 1
556558
except ValueError:
557559
if is_coerce:
558560
iresult[i] = NPY_NAT
559561
continue
560562
raise
561563

562564
elif is_datetime64_object(val):
565+
seen_datetime = 1
563566
if get_datetime64_value(val) == NPY_NAT:
564567
iresult[i] = NPY_NAT
565568
else:
566569
try:
567570
iresult[i] = get_datetime64_nanos(val)
568-
seen_datetime = 1
569571
except ValueError:
570572
if is_coerce:
571573
iresult[i] = NPY_NAT
@@ -574,66 +576,44 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
574576

575577
elif is_integer_object(val) or is_float_object(val):
576578
# these must be ns unit by-definition
579+
seen_integer = 1
577580

578581
if val != val or val == NPY_NAT:
579582
iresult[i] = NPY_NAT
580583
elif is_raise or is_ignore:
581584
iresult[i] = val
582-
seen_integer = 1
583585
else:
584586
# coerce
585587
# we now need to parse this as if unit='ns'
586588
# we can ONLY accept integers at this point
587589
# if we have previously (or in future accept
588590
# datetimes/strings, then we must coerce)
589-
seen_integer = 1
590591
try:
591592
iresult[i] = cast_from_unit(val, 'ns')
592593
except:
593594
iresult[i] = NPY_NAT
594595

595596
elif is_string_object(val):
596597
# string
598+
seen_string = 1
597599

598600
if len(val) == 0 or val in nat_strings:
599601
iresult[i] = NPY_NAT
600602
continue
601-
602-
seen_string = 1
603+
if PyUnicode_Check(val) and PY2:
604+
val = val.encode('utf-8')
603605

604606
try:
605607
_string_to_dts(val, &dts, &out_local, &out_tzoffset)
606-
value = dtstruct_to_dt64(&dts)
607-
if out_local == 1:
608-
tz = pytz.FixedOffset(out_tzoffset)
609-
value = tz_convert_single(value, tz, 'UTC')
610-
iresult[i] = value
611-
check_dts_bounds(&dts)
612-
except OutOfBoundsDatetime:
613-
# GH#19382 for just-barely-OutOfBounds falling back to
614-
# dateutil parser will return incorrect result because
615-
# it will ignore nanoseconds
616-
if require_iso8601:
617-
if _parse_today_now(val, &iresult[i]):
618-
continue
619-
elif is_coerce:
620-
iresult[i] = NPY_NAT
621-
continue
622-
elif is_raise:
623-
raise ValueError("time data {val} doesn't match "
624-
"format specified"
625-
.format(val=val))
626-
return values
627-
elif is_coerce:
628-
iresult[i] = NPY_NAT
629-
continue
630-
raise
631608
except ValueError:
632-
# if requiring iso8601 strings, skip trying other formats
633-
if require_iso8601:
634-
if _parse_today_now(val, &iresult[i]):
635-
continue
636-
elif is_coerce:
609+
# A ValueError at this point is a _parsing_ error
610+
# specifically _not_ OutOfBoundsDatetime
611+
if _parse_today_now(val, &iresult[i]):
612+
continue
613+
elif require_iso8601:
614+
# if requiring iso8601 strings, skip trying
615+
# other formats
616+
if is_coerce:
637617
iresult[i] = NPY_NAT
638618
continue
639619
elif is_raise:
@@ -646,8 +626,6 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
646626
py_dt = parse_datetime_string(val, dayfirst=dayfirst,
647627
yearfirst=yearfirst)
648628
except Exception:
649-
if _parse_today_now(val, &iresult[i]):
650-
continue
651629
if is_coerce:
652630
iresult[i] = NPY_NAT
653631
continue
@@ -656,16 +634,42 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
656634
try:
657635
_ts = convert_datetime_to_tsobject(py_dt, None)
658636
iresult[i] = _ts.value
659-
except ValueError:
637+
except OutOfBoundsDatetime:
660638
if is_coerce:
661639
iresult[i] = NPY_NAT
662640
continue
663641
raise
664642
except:
643+
# TODO: What exception are we concerned with here?
665644
if is_coerce:
666645
iresult[i] = NPY_NAT
667646
continue
668647
raise
648+
else:
649+
# No error raised by string_to_dts, pick back up
650+
# where we left off
651+
value = dtstruct_to_dt64(&dts)
652+
if out_local == 1:
653+
tz = pytz.FixedOffset(out_tzoffset)
654+
value = tz_convert_single(value, tz, 'UTC')
655+
iresult[i] = value
656+
try:
657+
check_dts_bounds(&dts)
658+
except OutOfBoundsDatetime:
659+
# GH#19382 for just-barely-OutOfBounds falling back to
660+
# dateutil parser will return incorrect result because
661+
# it will ignore nanoseconds
662+
if is_coerce:
663+
iresult[i] = NPY_NAT
664+
continue
665+
elif require_iso8601:
666+
if is_raise:
667+
raise ValueError("time data {val} doesn't "
668+
"match format specified"
669+
.format(val=val))
670+
return values
671+
raise
672+
669673
else:
670674
if is_coerce:
671675
iresult[i] = NPY_NAT

pandas/tests/indexes/datetimes/test_tools.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from pandas.core.tools import datetimes as tools
1919

2020
from pandas.errors import OutOfBoundsDatetime
21-
from pandas.compat import lmap
21+
from pandas.compat import lmap, PY3
2222
from pandas.compat.numpy import np_array_datetime64_compat
2323
from pandas.core.dtypes.common import is_datetime64_ns_dtype
2424
from pandas.util import testing as tm
@@ -238,6 +238,13 @@ def test_to_datetime_today(self):
238238
assert pdtoday.tzinfo is None
239239
assert pdtoday2.tzinfo is None
240240

241+
def test_to_datetime_today_now_unicode_bytes(self):
242+
to_datetime([u'now'])
243+
to_datetime([u'today'])
244+
if not PY3:
245+
to_datetime(['now'])
246+
to_datetime(['today'])
247+
241248
@pytest.mark.parametrize('cache', [True, False])
242249
def test_to_datetime_dt64s(self, cache):
243250
in_bound_dts = [

0 commit comments

Comments
 (0)