Skip to content

Commit 5574109

Browse files
authored
REF: handle 2D in array_to_datetime (#50549)
* REF: handle 2D in array_to_datetime * troubleshoot build failures * avoid build error
1 parent 6f719b2 commit 5574109

File tree

2 files changed

+46
-21
lines changed

2 files changed

+46
-21
lines changed

pandas/_libs/tslib.pyx

+45-16
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ def first_non_null(values: ndarray) -> int:
439439
@cython.wraparound(False)
440440
@cython.boundscheck(False)
441441
cpdef array_to_datetime(
442-
ndarray[object] values,
442+
ndarray values, # object dtype, arbitrary ndim
443443
str errors="raise",
444444
bint dayfirst=False,
445445
bint yearfirst=False,
@@ -478,7 +478,7 @@ cpdef array_to_datetime(
478478
tzinfo or None
479479
"""
480480
cdef:
481-
Py_ssize_t i, n = len(values)
481+
Py_ssize_t i, n = values.size
482482
object val, tz
483483
ndarray[int64_t] iresult
484484
npy_datetimestruct dts
@@ -498,15 +498,18 @@ cpdef array_to_datetime(
498498
datetime py_dt
499499
tzinfo tz_out = None
500500
bint found_tz = False, found_naive = False
501+
cnp.broadcast mi
501502

502503
# specify error conditions
503504
assert is_raise or is_ignore or is_coerce
504505

505-
result = np.empty(n, dtype="M8[ns]")
506-
iresult = result.view("i8")
506+
result = np.empty((<object>values).shape, dtype="M8[ns]")
507+
mi = cnp.PyArray_MultiIterNew2(result, values)
508+
iresult = result.view("i8").ravel()
507509

508510
for i in range(n):
509-
val = values[i]
511+
# Analogous to `val = values[i]`
512+
val = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
510513

511514
try:
512515
if checknull_with_nat_and_na(val):
@@ -524,7 +527,7 @@ cpdef array_to_datetime(
524527
found_tz,
525528
utc_convert,
526529
)
527-
result[i] = parse_pydatetime(val, &dts, utc_convert)
530+
iresult[i] = parse_pydatetime(val, &dts, utc_convert)
528531

529532
elif PyDate_Check(val):
530533
iresult[i] = pydate_to_dt64(val, &dts)
@@ -559,6 +562,7 @@ cpdef array_to_datetime(
559562

560563
if len(val) == 0 or val in nat_strings:
561564
iresult[i] = NPY_NAT
565+
cnp.PyArray_MultiIter_NEXT(mi)
562566
continue
563567

564568
string_to_dts_failed = string_to_dts(
@@ -569,6 +573,7 @@ cpdef array_to_datetime(
569573
# An error at this point is a _parsing_ error
570574
# specifically _not_ OutOfBoundsDatetime
571575
if parse_today_now(val, &iresult[i], utc):
576+
cnp.PyArray_MultiIter_NEXT(mi)
572577
continue
573578

574579
py_dt = parse_datetime_string(val,
@@ -614,10 +619,13 @@ cpdef array_to_datetime(
614619
else:
615620
raise TypeError(f"{type(val)} is not convertible to datetime")
616621

622+
cnp.PyArray_MultiIter_NEXT(mi)
623+
617624
except OutOfBoundsDatetime as ex:
618625
ex.args = (f"{ex}, at position {i}",)
619626
if is_coerce:
620627
iresult[i] = NPY_NAT
628+
cnp.PyArray_MultiIter_NEXT(mi)
621629
continue
622630
elif is_raise:
623631
raise
@@ -627,6 +635,7 @@ cpdef array_to_datetime(
627635
ex.args = (f"{ex}, at position {i}",)
628636
if is_coerce:
629637
iresult[i] = NPY_NAT
638+
cnp.PyArray_MultiIter_NEXT(mi)
630639
continue
631640
elif is_raise:
632641
raise
@@ -650,7 +659,7 @@ cpdef array_to_datetime(
650659

651660
@cython.wraparound(False)
652661
@cython.boundscheck(False)
653-
cdef ndarray[object] ignore_errors_out_of_bounds_fallback(ndarray[object] values):
662+
cdef ndarray ignore_errors_out_of_bounds_fallback(ndarray values):
654663
"""
655664
Fallback for array_to_datetime if an OutOfBoundsDatetime is raised
656665
and errors == "ignore"
@@ -664,27 +673,36 @@ cdef ndarray[object] ignore_errors_out_of_bounds_fallback(ndarray[object] values
664673
ndarray[object]
665674
"""
666675
cdef:
667-
Py_ssize_t i, n = len(values)
676+
Py_ssize_t i, n = values.size
668677
object val
678+
cnp.broadcast mi
679+
ndarray[object] oresult
680+
ndarray oresult_nd
669681

670-
oresult = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0)
682+
oresult_nd = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0)
683+
mi = cnp.PyArray_MultiIterNew2(oresult_nd, values)
684+
oresult = oresult_nd.ravel()
671685

672686
for i in range(n):
673-
val = values[i]
687+
# Analogous to `val = values[i]`
688+
val = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
674689

675690
# set as nan except if its a NaT
676691
if checknull_with_nat_and_na(val):
677692
if isinstance(val, float):
678693
oresult[i] = np.nan
679694
else:
680-
oresult[i] = NaT
695+
oresult[i] = <object>NaT
681696
elif is_datetime64_object(val):
682697
if get_datetime64_value(val) == NPY_NAT:
683-
oresult[i] = NaT
698+
oresult[i] = <object>NaT
684699
else:
685700
oresult[i] = val.item()
686701
else:
687702
oresult[i] = val
703+
704+
cnp.PyArray_MultiIter_NEXT(mi)
705+
688706
return oresult
689707

690708

@@ -719,24 +737,30 @@ cdef _array_to_datetime_object(
719737
Literal[None]
720738
"""
721739
cdef:
722-
Py_ssize_t i, n = len(values)
740+
Py_ssize_t i, n = values.size
723741
object val
724742
bint is_ignore = errors == "ignore"
725743
bint is_coerce = errors == "coerce"
726744
bint is_raise = errors == "raise"
745+
ndarray oresult_nd
727746
ndarray[object] oresult
728747
npy_datetimestruct dts
748+
cnp.broadcast mi
729749

730750
assert is_raise or is_ignore or is_coerce
731751

732-
oresult = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0)
752+
oresult_nd = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0)
753+
mi = cnp.PyArray_MultiIterNew2(oresult_nd, values)
754+
oresult = oresult_nd.ravel()
733755

734756
# We return an object array and only attempt to parse:
735757
# 1) NaT or NaT-like values
736758
# 2) datetime strings, which we return as datetime.datetime
737759
# 3) special strings - "now" & "today"
738760
for i in range(n):
739-
val = values[i]
761+
# Analogous to: val = values[i]
762+
val = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
763+
740764
if checknull_with_nat_and_na(val) or PyDateTime_Check(val):
741765
# GH 25978. No need to parse NaT-like or datetime-like vals
742766
oresult[i] = val
@@ -747,6 +771,7 @@ cdef _array_to_datetime_object(
747771

748772
if len(val) == 0 or val in nat_strings:
749773
oresult[i] = "NaT"
774+
cnp.PyArray_MultiIter_NEXT(mi)
750775
continue
751776
try:
752777
oresult[i] = parse_datetime_string(val, dayfirst=dayfirst,
@@ -757,6 +782,7 @@ cdef _array_to_datetime_object(
757782
ex.args = (f"{ex}, at position {i}", )
758783
if is_coerce:
759784
oresult[i] = <object>NaT
785+
cnp.PyArray_MultiIter_NEXT(mi)
760786
continue
761787
if is_raise:
762788
raise
@@ -765,7 +791,10 @@ cdef _array_to_datetime_object(
765791
if is_raise:
766792
raise
767793
return values, None
768-
return oresult, None
794+
795+
cnp.PyArray_MultiIter_NEXT(mi)
796+
797+
return oresult_nd, None
769798

770799

771800
def array_to_datetime_with_tz(ndarray values, tzinfo tz):

pandas/core/arrays/datetimes.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from typing import (
1010
TYPE_CHECKING,
1111
Iterator,
12-
Literal,
1312
cast,
1413
)
1514
import warnings
@@ -2149,16 +2148,13 @@ def objects_to_datetime64ns(
21492148
# if str-dtype, convert
21502149
data = np.array(data, copy=False, dtype=np.object_)
21512150

2152-
flags = data.flags
2153-
order: Literal["F", "C"] = "F" if flags.f_contiguous else "C"
21542151
result, tz_parsed = tslib.array_to_datetime(
2155-
data.ravel("K"),
2152+
data,
21562153
errors=errors,
21572154
utc=utc,
21582155
dayfirst=dayfirst,
21592156
yearfirst=yearfirst,
21602157
)
2161-
result = result.reshape(data.shape, order=order)
21622158

21632159
if tz_parsed is not None:
21642160
# We can take a shortcut since the datetime64 numpy array

0 commit comments

Comments
 (0)