Skip to content

REF: array_to_datetime catch overflows in one place #24049

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 2, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
267 changes: 121 additions & 146 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -526,182 +526,157 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',
for i in range(n):
val = values[i]

if checknull_with_nat(val):
iresult[i] = NPY_NAT
try:
if checknull_with_nat(val):
iresult[i] = NPY_NAT

elif PyDateTime_Check(val):
seen_datetime = 1
if val.tzinfo is not None:
if utc_convert:
try:
elif PyDateTime_Check(val):
seen_datetime = 1
if val.tzinfo is not None:
if utc_convert:
_ts = convert_datetime_to_tsobject(val, None)
iresult[i] = _ts.value
except OutOfBoundsDatetime:
if is_coerce:
iresult[i] = NPY_NAT
continue
raise
else:
raise ValueError('Tz-aware datetime.datetime '
'cannot be converted to '
'datetime64 unless utc=True')
else:
raise ValueError('Tz-aware datetime.datetime cannot '
'be converted to datetime64 unless '
'utc=True')
else:
iresult[i] = pydatetime_to_dt64(val, &dts)
if not PyDateTime_CheckExact(val):
# i.e. a Timestamp object
iresult[i] += val.nanosecond
try:
iresult[i] = pydatetime_to_dt64(val, &dts)
if not PyDateTime_CheckExact(val):
# i.e. a Timestamp object
iresult[i] += val.nanosecond
check_dts_bounds(&dts)
except OutOfBoundsDatetime:
if is_coerce:
iresult[i] = NPY_NAT
continue
raise

elif PyDate_Check(val):
seen_datetime = 1
iresult[i] = pydate_to_dt64(val, &dts)
try:
elif PyDate_Check(val):
seen_datetime = 1
iresult[i] = pydate_to_dt64(val, &dts)
check_dts_bounds(&dts)
except OutOfBoundsDatetime:
if is_coerce:
iresult[i] = NPY_NAT
continue
raise

elif is_datetime64_object(val):
seen_datetime = 1
try:
elif is_datetime64_object(val):
seen_datetime = 1
iresult[i] = get_datetime64_nanos(val)
except OutOfBoundsDatetime:
if is_coerce:
iresult[i] = NPY_NAT
continue
raise

elif is_integer_object(val) or is_float_object(val):
# these must be ns unit by-definition
seen_integer = 1
elif is_integer_object(val) or is_float_object(val):
# these must be ns unit by-definition
seen_integer = 1

if val != val or val == NPY_NAT:
iresult[i] = NPY_NAT
elif is_raise or is_ignore:
iresult[i] = val
else:
# coerce
# we now need to parse this as if unit='ns'
# we can ONLY accept integers at this point
# if we have previously (or in future accept
# datetimes/strings, then we must coerce)
try:
iresult[i] = cast_from_unit(val, 'ns')
except:
if val != val or val == NPY_NAT:
iresult[i] = NPY_NAT
elif is_raise or is_ignore:
iresult[i] = val
else:
# coerce
# we now need to parse this as if unit='ns'
# we can ONLY accept integers at this point
# if we have previously (or in future accept
# datetimes/strings, then we must coerce)
try:
iresult[i] = cast_from_unit(val, 'ns')
except:
iresult[i] = NPY_NAT

elif is_string_object(val):
# string
seen_string = 1

if len(val) == 0 or val in nat_strings:
iresult[i] = NPY_NAT
continue
if isinstance(val, unicode) and PY2:
val = val.encode('utf-8')
elif is_string_object(val):
# string
seen_string = 1

try:
_string_to_dts(val, &dts, &out_local, &out_tzoffset)
except ValueError:
# A ValueError at this point is a _parsing_ error
# specifically _not_ OutOfBoundsDatetime
if _parse_today_now(val, &iresult[i]):
if len(val) == 0 or val in nat_strings:
iresult[i] = NPY_NAT
continue
elif require_iso8601:
# if requiring iso8601 strings, skip trying
# other formats
if is_coerce:
iresult[i] = NPY_NAT
continue
elif is_raise:
raise ValueError("time data {val} doesn't match "
"format specified"
.format(val=val))
return values, tz_out
if isinstance(val, unicode) and PY2:
val = val.encode('utf-8')

try:
py_dt = parse_datetime_string(val, dayfirst=dayfirst,
yearfirst=yearfirst)
except Exception:
if is_coerce:
iresult[i] = NPY_NAT
_string_to_dts(val, &dts, &out_local, &out_tzoffset)
except ValueError:
# A ValueError at this point is a _parsing_ error
# specifically _not_ OutOfBoundsDatetime
if _parse_today_now(val, &iresult[i]):
continue
raise TypeError("invalid string coercion to datetime")

# If the dateutil parser returned tzinfo, capture it
# to check if all arguments have the same tzinfo
tz = py_dt.utcoffset()
if tz is not None:
seen_datetime_offset = 1
# dateutil timezone objects cannot be hashed, so store
# the UTC offsets in seconds instead
out_tzoffset_vals.add(tz.total_seconds())
else:
# Add a marker for naive string, to track if we are
# parsing mixed naive and aware strings
out_tzoffset_vals.add('naive')
try:
elif require_iso8601:
# if requiring iso8601 strings, skip trying
# other formats
if is_coerce:
iresult[i] = NPY_NAT
continue
elif is_raise:
raise ValueError("time data {val} doesn't "
"match format specified"
.format(val=val))
return values, tz_out

try:
py_dt = parse_datetime_string(val,
dayfirst=dayfirst,
yearfirst=yearfirst)
except Exception:
if is_coerce:
iresult[i] = NPY_NAT
continue
raise TypeError("invalid string coercion to "
"datetime")

# If the dateutil parser returned tzinfo, capture it
# to check if all arguments have the same tzinfo
tz = py_dt.utcoffset()
if tz is not None:
seen_datetime_offset = 1
# dateutil timezone objects cannot be hashed, so
# store the UTC offsets in seconds instead
out_tzoffset_vals.add(tz.total_seconds())
else:
# Add a marker for naive string, to track if we are
# parsing mixed naive and aware strings
out_tzoffset_vals.add('naive')

_ts = convert_datetime_to_tsobject(py_dt, None)
iresult[i] = _ts.value
except OutOfBoundsDatetime:
except:
# TODO: What exception are we concerned with here?
if is_coerce:
iresult[i] = NPY_NAT
continue
raise
except:
# TODO: What exception are we concerned with here?
else:
# No error raised by string_to_dts, pick back up
# where we left off
value = dtstruct_to_dt64(&dts)
if out_local == 1:
seen_datetime_offset = 1
# Store the out_tzoffset in seconds
# since we store the total_seconds of
# dateutil.tz.tzoffset objects
out_tzoffset_vals.add(out_tzoffset * 60.)
tz = pytz.FixedOffset(out_tzoffset)
value = tz_convert_single(value, tz, UTC)
else:
# Add a marker for naive string, to track if we are
# parsing mixed naive and aware strings
out_tzoffset_vals.add('naive')
iresult[i] = value
check_dts_bounds(&dts)

else:
if is_coerce:
iresult[i] = NPY_NAT
continue
raise
else:
# No error raised by string_to_dts, pick back up
# where we left off
value = dtstruct_to_dt64(&dts)
if out_local == 1:
seen_datetime_offset = 1
# Store the out_tzoffset in seconds
# since we store the total_seconds of
# dateutil.tz.tzoffset objects
out_tzoffset_vals.add(out_tzoffset * 60.)
tz = pytz.FixedOffset(out_tzoffset)
value = tz_convert_single(value, tz, UTC)
else:
# Add a marker for naive string, to track if we are
# parsing mixed naive and aware strings
out_tzoffset_vals.add('naive')
iresult[i] = value
try:
check_dts_bounds(&dts)
except OutOfBoundsDatetime:
# GH#19382 for just-barely-OutOfBounds falling back to
# dateutil parser will return incorrect result because
# it will ignore nanoseconds
if is_coerce:
iresult[i] = NPY_NAT
continue
elif require_iso8601:
if is_raise:
raise ValueError("time data {val} doesn't "
"match format specified"
.format(val=val))
return values, tz_out
raise
raise TypeError("{typ} is not convertible to datetime"
.format(typ=type(val)))

else:
except OutOfBoundsDatetime:
if is_coerce:
iresult[i] = NPY_NAT
else:
raise TypeError("{typ} is not convertible to datetime"
.format(typ=type(val)))
continue
elif require_iso8601 and is_string_object(val):
# GH#19382 for just-barely-OutOfBounds falling back to
# dateutil parser will return incorrect result because
# it will ignore nanoseconds
if is_raise:
raise ValueError("time data {val} doesn't "
"match format specified"
.format(val=val))
assert is_ignore
return values, tz_out
raise

if seen_datetime and seen_integer:
# we have mixed datetimes & integers
Expand Down