Skip to content

Commit 79ce542

Browse files
author
MarcoGorelli
committed
refactor double-try-except and fix bug
1 parent 3a0db10 commit 79ce542

File tree

3 files changed

+137
-171
lines changed

3 files changed

+137
-171
lines changed

doc/source/whatsnew/v2.0.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -823,7 +823,8 @@ Datetimelike
823823
- Bug in :func:`to_datetime` was throwing ``ValueError`` when parsing dates with ISO8601 format where some values were not zero-padded (:issue:`21422`)
824824
- Bug in :func:`to_datetime` was giving incorrect results when using ``format='%Y%m%d'`` and ``errors='ignore'`` (:issue:`26493`)
825825
- Bug in :func:`to_datetime` was failing to parse date strings ``'today'`` and ``'now'`` if ``format`` was not ISO8601 (:issue:`50359`)
826-
- Bug in :func:`Timestamp.utctimetuple` raising a ``TypeError`` (:issue:`32174`)
826+
- Bug in :func:`to_datetime` was raising ``ValueError`` when parsing mixed-offset :class:`Timestamp` with ``errors='ignore'`` (:issue:`50585`)
827+
-
827828

828829
Timedelta
829830
^^^^^^^^^

pandas/_libs/tslib.pyx

+115-164
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ from pandas._libs.tslibs.np_datetime cimport (
3030
NPY_DATETIMEUNIT,
3131
NPY_FR_ns,
3232
check_dts_bounds,
33-
get_datetime64_value,
3433
npy_datetimestruct,
3534
npy_datetimestruct_to_datetime,
3635
pandas_datetime_to_datetimestruct,
@@ -505,144 +504,136 @@ cpdef array_to_datetime(
505504
result = np.empty(n, dtype="M8[ns]")
506505
iresult = result.view("i8")
507506

508-
try:
509-
for i in range(n):
510-
val = values[i]
507+
for i in range(n):
508+
val = values[i]
511509

512-
try:
513-
if checknull_with_nat_and_na(val):
514-
iresult[i] = NPY_NAT
510+
try:
511+
if checknull_with_nat_and_na(val):
512+
iresult[i] = NPY_NAT
515513

516-
elif PyDateTime_Check(val):
517-
if val.tzinfo is not None:
518-
found_tz = True
519-
else:
520-
found_naive = True
521-
tz_out = convert_timezone(
522-
val.tzinfo,
523-
tz_out,
524-
found_naive,
525-
found_tz,
526-
utc_convert,
527-
)
528-
result[i] = parse_pydatetime(val, &dts, utc_convert)
514+
elif PyDateTime_Check(val):
515+
if val.tzinfo is not None:
516+
found_tz = True
517+
else:
518+
found_naive = True
519+
tz_out = convert_timezone(
520+
val.tzinfo,
521+
tz_out,
522+
found_naive,
523+
found_tz,
524+
utc_convert,
525+
)
526+
result[i] = parse_pydatetime(val, &dts, utc_convert)
527+
528+
elif PyDate_Check(val):
529+
iresult[i] = pydate_to_dt64(val, &dts)
530+
check_dts_bounds(&dts)
529531

530-
elif PyDate_Check(val):
531-
iresult[i] = pydate_to_dt64(val, &dts)
532-
check_dts_bounds(&dts)
532+
elif is_datetime64_object(val):
533+
iresult[i] = get_datetime64_nanos(val, NPY_FR_ns)
533534

534-
elif is_datetime64_object(val):
535-
iresult[i] = get_datetime64_nanos(val, NPY_FR_ns)
535+
elif is_integer_object(val) or is_float_object(val):
536+
# these must be ns unit by-definition
536537

537-
elif is_integer_object(val) or is_float_object(val):
538-
# these must be ns unit by-definition
538+
if val != val or val == NPY_NAT:
539+
iresult[i] = NPY_NAT
540+
elif is_raise or is_ignore:
541+
iresult[i] = val
542+
else:
543+
# coerce
544+
# we now need to parse this as if unit='ns'
545+
# we can ONLY accept integers at this point
546+
# if we have previously (or in future accept
547+
# datetimes/strings, then we must coerce)
548+
iresult[i] = cast_from_unit(val, "ns")
539549

540-
if val != val or val == NPY_NAT:
541-
iresult[i] = NPY_NAT
542-
elif is_raise or is_ignore:
543-
iresult[i] = val
544-
else:
545-
# coerce
546-
# we now need to parse this as if unit='ns'
547-
# we can ONLY accept integers at this point
548-
# if we have previously (or in future accept
549-
# datetimes/strings, then we must coerce)
550-
try:
551-
iresult[i] = cast_from_unit(val, "ns")
552-
except OverflowError:
553-
iresult[i] = NPY_NAT
550+
elif isinstance(val, str):
551+
# string
552+
if type(val) is not str:
553+
# GH#32264 np.str_ object
554+
val = str(val)
554555

555-
elif isinstance(val, str):
556-
# string
557-
if type(val) is not str:
558-
# GH#32264 np.str_ object
559-
val = str(val)
556+
if len(val) == 0 or val in nat_strings:
557+
iresult[i] = NPY_NAT
558+
continue
560559

561-
if len(val) == 0 or val in nat_strings:
562-
iresult[i] = NPY_NAT
560+
string_to_dts_failed = string_to_dts(
561+
val, &dts, &out_bestunit, &out_local,
562+
&out_tzoffset, False, None, False
563+
)
564+
if string_to_dts_failed:
565+
# An error at this point is a _parsing_ error
566+
# specifically _not_ OutOfBoundsDatetime
567+
if parse_today_now(val, &iresult[i], utc):
563568
continue
564569

565-
string_to_dts_failed = string_to_dts(
566-
val, &dts, &out_bestunit, &out_local,
567-
&out_tzoffset, False, None, False
568-
)
569-
if string_to_dts_failed:
570-
# An error at this point is a _parsing_ error
571-
# specifically _not_ OutOfBoundsDatetime
572-
if parse_today_now(val, &iresult[i], utc):
570+
try:
571+
py_dt = parse_datetime_string(val,
572+
dayfirst=dayfirst,
573+
yearfirst=yearfirst)
574+
# If the dateutil parser returned tzinfo, capture it
575+
# to check if all arguments have the same tzinfo
576+
tz = py_dt.utcoffset()
577+
578+
except (ValueError, OverflowError):
579+
if is_coerce:
580+
iresult[i] = NPY_NAT
573581
continue
574-
575-
try:
576-
py_dt = parse_datetime_string(val,
577-
dayfirst=dayfirst,
578-
yearfirst=yearfirst)
579-
# If the dateutil parser returned tzinfo, capture it
580-
# to check if all arguments have the same tzinfo
581-
tz = py_dt.utcoffset()
582-
583-
except (ValueError, OverflowError):
584-
if is_coerce:
585-
iresult[i] = NPY_NAT
586-
continue
587-
raise TypeError(
588-
f"invalid string coercion to datetime "
589-
f"for \"{val}\", at position {i}"
590-
)
591-
592-
if tz is not None:
593-
seen_datetime_offset = True
594-
# dateutil timezone objects cannot be hashed, so
595-
# store the UTC offsets in seconds instead
596-
out_tzoffset_vals.add(tz.total_seconds())
597-
else:
598-
# Add a marker for naive string, to track if we are
599-
# parsing mixed naive and aware strings
600-
out_tzoffset_vals.add("naive")
601-
602-
_ts = convert_datetime_to_tsobject(py_dt, None)
603-
iresult[i] = _ts.value
604-
if not string_to_dts_failed:
605-
# No error reported by string_to_dts, pick back up
606-
# where we left off
607-
value = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts)
608-
if out_local == 1:
609-
seen_datetime_offset = True
610-
# Store the out_tzoffset in seconds
611-
# since we store the total_seconds of
612-
# dateutil.tz.tzoffset objects
613-
out_tzoffset_vals.add(out_tzoffset * 60.)
614-
tz = timezone(timedelta(minutes=out_tzoffset))
615-
value = tz_localize_to_utc_single(value, tz)
616-
out_local = 0
617-
out_tzoffset = 0
618-
else:
619-
# Add a marker for naive string, to track if we are
620-
# parsing mixed naive and aware strings
621-
out_tzoffset_vals.add("naive")
622-
iresult[i] = value
623-
check_dts_bounds(&dts)
624-
625-
else:
626-
if is_coerce:
627-
iresult[i] = NPY_NAT
582+
raise TypeError(
583+
f"invalid string coercion to datetime "
584+
f"for \"{val}\", at position {i}"
585+
)
586+
587+
if tz is not None:
588+
seen_datetime_offset = True
589+
# dateutil timezone objects cannot be hashed, so
590+
# store the UTC offsets in seconds instead
591+
out_tzoffset_vals.add(tz.total_seconds())
628592
else:
629-
raise TypeError(f"{type(val)} is not convertible to datetime")
630-
631-
except OutOfBoundsDatetime as ex:
632-
ex.args = (f"{ex}, at position {i}",)
633-
if is_coerce:
634-
iresult[i] = NPY_NAT
635-
continue
636-
raise
593+
# Add a marker for naive string, to track if we are
594+
# parsing mixed naive and aware strings
595+
out_tzoffset_vals.add("naive")
596+
597+
_ts = convert_datetime_to_tsobject(py_dt, None)
598+
iresult[i] = _ts.value
599+
if not string_to_dts_failed:
600+
# No error reported by string_to_dts, pick back up
601+
# where we left off
602+
value = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts)
603+
if out_local == 1:
604+
seen_datetime_offset = True
605+
# Store the out_tzoffset in seconds
606+
# since we store the total_seconds of
607+
# dateutil.tz.tzoffset objects
608+
out_tzoffset_vals.add(out_tzoffset * 60.)
609+
tz = timezone(timedelta(minutes=out_tzoffset))
610+
value = tz_localize_to_utc_single(value, tz)
611+
out_local = 0
612+
out_tzoffset = 0
613+
else:
614+
# Add a marker for naive string, to track if we are
615+
# parsing mixed naive and aware strings
616+
out_tzoffset_vals.add("naive")
617+
iresult[i] = value
618+
check_dts_bounds(&dts)
637619

638-
except OutOfBoundsDatetime:
639-
if is_raise:
640-
raise
620+
else:
621+
raise TypeError(f"{type(val)} is not convertible to datetime")
641622

642-
return ignore_errors_out_of_bounds_fallback(values), tz_out
623+
except (OutOfBoundsDatetime, ValueError) as ex:
624+
ex.args = (f"{ex}, at position {i}",)
625+
if is_coerce:
626+
iresult[i] = NPY_NAT
627+
continue
628+
elif is_raise:
629+
raise
630+
return values, None
643631

644-
except TypeError:
645-
return _array_to_datetime_object(values, errors, dayfirst, yearfirst)
632+
except TypeError:
633+
if is_coerce:
634+
iresult[i] = NPY_NAT
635+
continue
636+
return _array_to_datetime_object(values, errors, dayfirst, yearfirst)
646637

647638
if seen_datetime_offset and not utc_convert:
648639
# GH#17697
@@ -660,46 +651,6 @@ cpdef array_to_datetime(
660651
return result, tz_out
661652

662653

663-
@cython.wraparound(False)
664-
@cython.boundscheck(False)
665-
cdef ndarray[object] ignore_errors_out_of_bounds_fallback(ndarray[object] values):
666-
"""
667-
Fallback for array_to_datetime if an OutOfBoundsDatetime is raised
668-
and errors == "ignore"
669-
670-
Parameters
671-
----------
672-
values : ndarray[object]
673-
674-
Returns
675-
-------
676-
ndarray[object]
677-
"""
678-
cdef:
679-
Py_ssize_t i, n = len(values)
680-
object val
681-
682-
oresult = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0)
683-
684-
for i in range(n):
685-
val = values[i]
686-
687-
# set as nan except if its a NaT
688-
if checknull_with_nat_and_na(val):
689-
if isinstance(val, float):
690-
oresult[i] = np.nan
691-
else:
692-
oresult[i] = NaT
693-
elif is_datetime64_object(val):
694-
if get_datetime64_value(val) == NPY_NAT:
695-
oresult[i] = NaT
696-
else:
697-
oresult[i] = val.item()
698-
else:
699-
oresult[i] = val
700-
return oresult
701-
702-
703654
@cython.wraparound(False)
704655
@cython.boundscheck(False)
705656
cdef _array_to_datetime_object(

pandas/tests/tools/test_to_datetime.py

+20-6
Original file line numberDiff line numberDiff line change
@@ -1072,12 +1072,10 @@ def test_to_datetime_array_of_dt64s(self, cache, unit):
10721072
),
10731073
)
10741074

1075-
# With errors='ignore', out of bounds datetime64s
1076-
# are converted to their .item(), which depending on the version of
1077-
# numpy is either a python datetime.datetime or datetime.date
1075+
# With errors='ignore', the input is returned
10781076
tm.assert_index_equal(
10791077
to_datetime(dts_with_oob, errors="ignore", cache=cache),
1080-
Index([dt.item() for dt in dts_with_oob]),
1078+
Index(dts_with_oob),
10811079
)
10821080

10831081
def test_to_datetime_tz(self, cache):
@@ -1094,8 +1092,9 @@ def test_to_datetime_tz(self, cache):
10941092
)
10951093
tm.assert_index_equal(result, expected)
10961094

1097-
def test_to_datetime_tz_mixed_raises(self, cache):
1098-
# mixed tzs will raise
1095+
def test_to_datetime_tz_mixed(self, cache):
1096+
# mixed tzs will raise if errors='raise'
1097+
# https://github.com/pandas-dev/pandas/issues/50585
10991098
arr = [
11001099
Timestamp("2013-01-01 13:00:00", tz="US/Pacific"),
11011100
Timestamp("2013-01-02 14:00:00", tz="US/Eastern"),
@@ -1107,6 +1106,21 @@ def test_to_datetime_tz_mixed_raises(self, cache):
11071106
with pytest.raises(ValueError, match=msg):
11081107
to_datetime(arr, cache=cache)
11091108

1109+
result = to_datetime(arr, cache=cache, errors="ignore")
1110+
expected = Index(
1111+
[
1112+
Timestamp("2013-01-01 13:00:00-08:00"),
1113+
Timestamp("2013-01-02 14:00:00-05:00"),
1114+
],
1115+
dtype="object",
1116+
)
1117+
tm.assert_index_equal(result, expected)
1118+
result = to_datetime(arr, cache=cache, errors="coerce")
1119+
expected = DatetimeIndex(
1120+
["2013-01-01 13:00:00-08:00", "NaT"], dtype="datetime64[ns, US/Pacific]"
1121+
)
1122+
tm.assert_index_equal(result, expected)
1123+
11101124
def test_to_datetime_different_offsets(self, cache):
11111125
# inspired by asv timeseries.ToDatetimeNONISO8601 benchmark
11121126
# see GH-26097 for more

0 commit comments

Comments
 (0)