Skip to content

REF: handle 2D in array_to_datetime #50549

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 45 additions & 16 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ def first_non_null(values: ndarray) -> int:
@cython.wraparound(False)
@cython.boundscheck(False)
cpdef array_to_datetime(
ndarray[object] values,
ndarray values, # object dtype, arbitrary ndim
str errors="raise",
bint dayfirst=False,
bint yearfirst=False,
Expand Down Expand Up @@ -478,7 +478,7 @@ cpdef array_to_datetime(
tzinfo or None
"""
cdef:
Py_ssize_t i, n = len(values)
Py_ssize_t i, n = values.size
object val, tz
ndarray[int64_t] iresult
npy_datetimestruct dts
Expand All @@ -498,15 +498,18 @@ cpdef array_to_datetime(
datetime py_dt
tzinfo tz_out = None
bint found_tz = False, found_naive = False
cnp.broadcast mi

# specify error conditions
assert is_raise or is_ignore or is_coerce

result = np.empty(n, dtype="M8[ns]")
iresult = result.view("i8")
result = np.empty((<object>values).shape, dtype="M8[ns]")
mi = cnp.PyArray_MultiIterNew2(result, values)
iresult = result.view("i8").ravel()

for i in range(n):
val = values[i]
# Analogous to `val = values[i]`
val = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]

try:
if checknull_with_nat_and_na(val):
Expand All @@ -524,7 +527,7 @@ cpdef array_to_datetime(
found_tz,
utc_convert,
)
result[i] = parse_pydatetime(val, &dts, utc_convert)
iresult[i] = parse_pydatetime(val, &dts, utc_convert)

elif PyDate_Check(val):
iresult[i] = pydate_to_dt64(val, &dts)
Expand Down Expand Up @@ -559,6 +562,7 @@ cpdef array_to_datetime(

if len(val) == 0 or val in nat_strings:
iresult[i] = NPY_NAT
cnp.PyArray_MultiIter_NEXT(mi)
continue

string_to_dts_failed = string_to_dts(
Expand All @@ -569,6 +573,7 @@ cpdef array_to_datetime(
# An error at this point is a _parsing_ error
# specifically _not_ OutOfBoundsDatetime
if parse_today_now(val, &iresult[i], utc):
cnp.PyArray_MultiIter_NEXT(mi)
continue

py_dt = parse_datetime_string(val,
Expand Down Expand Up @@ -614,10 +619,13 @@ cpdef array_to_datetime(
else:
raise TypeError(f"{type(val)} is not convertible to datetime")

cnp.PyArray_MultiIter_NEXT(mi)

except OutOfBoundsDatetime as ex:
ex.args = (f"{ex}, at position {i}",)
if is_coerce:
iresult[i] = NPY_NAT
cnp.PyArray_MultiIter_NEXT(mi)
continue
elif is_raise:
raise
Expand All @@ -627,6 +635,7 @@ cpdef array_to_datetime(
ex.args = (f"{ex}, at position {i}",)
if is_coerce:
iresult[i] = NPY_NAT
cnp.PyArray_MultiIter_NEXT(mi)
continue
elif is_raise:
raise
Expand All @@ -650,7 +659,7 @@ cpdef array_to_datetime(

@cython.wraparound(False)
@cython.boundscheck(False)
cdef ndarray[object] ignore_errors_out_of_bounds_fallback(ndarray[object] values):
cdef ndarray ignore_errors_out_of_bounds_fallback(ndarray values):
"""
Fallback for array_to_datetime if an OutOfBoundsDatetime is raised
and errors == "ignore"
Expand All @@ -664,27 +673,36 @@ cdef ndarray[object] ignore_errors_out_of_bounds_fallback(ndarray[object] values
ndarray[object]
"""
cdef:
Py_ssize_t i, n = len(values)
Py_ssize_t i, n = values.size
object val
cnp.broadcast mi
ndarray[object] oresult
ndarray oresult_nd

oresult = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0)
oresult_nd = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0)
mi = cnp.PyArray_MultiIterNew2(oresult_nd, values)
oresult = oresult_nd.ravel()

for i in range(n):
val = values[i]
# Analogous to `val = values[i]`
val = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]

# set as nan except if its a NaT
if checknull_with_nat_and_na(val):
if isinstance(val, float):
oresult[i] = np.nan
else:
oresult[i] = NaT
oresult[i] = <object>NaT
elif is_datetime64_object(val):
if get_datetime64_value(val) == NPY_NAT:
oresult[i] = NaT
oresult[i] = <object>NaT
else:
oresult[i] = val.item()
else:
oresult[i] = val

cnp.PyArray_MultiIter_NEXT(mi)

return oresult


Expand Down Expand Up @@ -719,24 +737,30 @@ cdef _array_to_datetime_object(
Literal[None]
"""
cdef:
Py_ssize_t i, n = len(values)
Py_ssize_t i, n = values.size
object val
bint is_ignore = errors == "ignore"
bint is_coerce = errors == "coerce"
bint is_raise = errors == "raise"
ndarray oresult_nd
ndarray[object] oresult
npy_datetimestruct dts
cnp.broadcast mi

assert is_raise or is_ignore or is_coerce

oresult = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0)
oresult_nd = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0)
mi = cnp.PyArray_MultiIterNew2(oresult_nd, values)
oresult = oresult_nd.ravel()

# We return an object array and only attempt to parse:
# 1) NaT or NaT-like values
# 2) datetime strings, which we return as datetime.datetime
# 3) special strings - "now" & "today"
for i in range(n):
val = values[i]
# Analogous to: val = values[i]
val = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]

if checknull_with_nat_and_na(val) or PyDateTime_Check(val):
# GH 25978. No need to parse NaT-like or datetime-like vals
oresult[i] = val
Expand All @@ -747,6 +771,7 @@ cdef _array_to_datetime_object(

if len(val) == 0 or val in nat_strings:
oresult[i] = "NaT"
cnp.PyArray_MultiIter_NEXT(mi)
continue
try:
oresult[i] = parse_datetime_string(val, dayfirst=dayfirst,
Expand All @@ -757,6 +782,7 @@ cdef _array_to_datetime_object(
ex.args = (f"{ex}, at position {i}", )
if is_coerce:
oresult[i] = <object>NaT
cnp.PyArray_MultiIter_NEXT(mi)
continue
if is_raise:
raise
Expand All @@ -765,7 +791,10 @@ cdef _array_to_datetime_object(
if is_raise:
raise
return values, None
return oresult, None

cnp.PyArray_MultiIter_NEXT(mi)

return oresult_nd, None


def array_to_datetime_with_tz(ndarray values, tzinfo tz):
Expand Down
6 changes: 1 addition & 5 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from typing import (
TYPE_CHECKING,
Iterator,
Literal,
cast,
)
import warnings
Expand Down Expand Up @@ -2149,16 +2148,13 @@ def objects_to_datetime64ns(
# if str-dtype, convert
data = np.array(data, copy=False, dtype=np.object_)

flags = data.flags
order: Literal["F", "C"] = "F" if flags.f_contiguous else "C"
result, tz_parsed = tslib.array_to_datetime(
data.ravel("K"),
data,
errors=errors,
utc=utc,
dayfirst=dayfirst,
yearfirst=yearfirst,
)
result = result.reshape(data.shape, order=order)

if tz_parsed is not None:
# We can take a shortcut since the datetime64 numpy array
Expand Down