Skip to content

Commit 7b45de7

Browse files
committed
POC/ENH: infer resolution in array_to_datetime (pandas-dev#55741)
* ENH: infer resolution in array_to_datetime * post-merge fixup * post-merge fixup
1 parent 5e74906 commit 7b45de7

File tree

2 files changed

+120
-4
lines changed

2 files changed

+120
-4
lines changed

pandas/_libs/tslib.pyx

+60-4
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,14 @@ import numpy as np
3131

3232
cnp.import_array()
3333

34+
from pandas._libs.tslibs.dtypes cimport (
35+
get_supported_reso,
36+
npy_unit_to_abbrev,
37+
)
3438
from pandas._libs.tslibs.np_datetime cimport (
3539
NPY_DATETIMEUNIT,
3640
NPY_FR_ns,
41+
get_datetime64_unit,
3742
import_pandas_datetime,
3843
npy_datetimestruct,
3944
npy_datetimestruct_to_datetime,
@@ -441,6 +446,7 @@ cpdef array_to_datetime(
441446
utc : bool, default False
442447
indicator whether the dates should be UTC
443448
creso : NPY_DATETIMEUNIT, default NPY_FR_ns
449+
Set to NPY_FR_GENERIC to infer a resolution.
444450
445451
Returns
446452
-------
@@ -464,14 +470,19 @@ cpdef array_to_datetime(
464470
set out_tzoffset_vals = set()
465471
tzinfo tz_out = None
466472
cnp.flatiter it = cnp.PyArray_IterNew(values)
467-
DatetimeParseState state = DatetimeParseState()
468-
str reso_str
473+
NPY_DATETIMEUNIT item_reso
474+
bint infer_reso = creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC
475+
DatetimeParseState state = DatetimeParseState(creso)
476+
str abbrev
469477

470478
# specify error conditions
471479
assert is_raise or is_ignore or is_coerce
472480

473-
reso_str = npy_unit_to_abbrev(creso)
474-
result = np.empty((<object>values).shape, dtype=f"M8[{reso_str}]")
481+
if infer_reso:
482+
abbrev = "ns"
483+
else:
484+
abbrev = npy_unit_to_abbrev(creso)
485+
result = np.empty((<object>values).shape, dtype=f"M8[{abbrev}]")
475486
iresult = result.view("i8").ravel()
476487

477488
for i in range(n):
@@ -484,19 +495,38 @@ cpdef array_to_datetime(
484495
iresult[i] = NPY_NAT
485496

486497
elif PyDateTime_Check(val):
498+
if isinstance(val, _Timestamp):
499+
item_reso = val._creso
500+
else:
501+
item_reso = NPY_DATETIMEUNIT.NPY_FR_us
502+
state.update_creso(item_reso)
503+
if infer_reso:
504+
creso = state.creso
487505
tz_out = state.process_datetime(val, tz_out, utc_convert)
488506
iresult[i] = parse_pydatetime(val, &dts, creso=creso)
489507

490508
elif PyDate_Check(val):
509+
item_reso = NPY_DATETIMEUNIT.NPY_FR_s
510+
state.update_creso(item_reso)
511+
if infer_reso:
512+
creso = state.creso
491513
iresult[i] = pydate_to_dt64(val, &dts, reso=creso)
492514
state.found_other = True
493515

494516
elif is_datetime64_object(val):
517+
item_reso = get_supported_reso(get_datetime64_unit(val))
518+
state.update_creso(item_reso)
519+
if infer_reso:
520+
creso = state.creso
495521
iresult[i] = get_datetime64_nanos(val, creso)
496522
state.found_other = True
497523

498524
elif is_integer_object(val) or is_float_object(val):
499525
# these must be ns unit by-definition
526+
item_reso = NPY_FR_ns
527+
state.update_creso(item_reso)
528+
if infer_reso:
529+
creso = state.creso
500530

501531
if val != val or val == NPY_NAT:
502532
iresult[i] = NPY_NAT
@@ -514,11 +544,20 @@ cpdef array_to_datetime(
514544
if parse_today_now(val, &iresult[i], utc, creso):
515545
# We can't _quite_ dispatch this to convert_str_to_tsobject
516546
# bc there isn't a nice way to pass "utc"
547+
item_reso = NPY_DATETIMEUNIT.NPY_FR_us
548+
state.update_creso(item_reso)
549+
if infer_reso:
550+
creso = state.creso
517551
continue
518552

519553
_ts = convert_str_to_tsobject(
520554
val, None, unit="ns", dayfirst=dayfirst, yearfirst=yearfirst
521555
)
556+
item_reso = _ts.creso
557+
state.update_creso(item_reso)
558+
if infer_reso:
559+
creso = state.creso
560+
522561
_ts.ensure_reso(creso, val)
523562

524563
iresult[i] = _ts.value
@@ -586,6 +625,23 @@ cpdef array_to_datetime(
586625
# e.g. test_to_datetime_mixed_awareness_mixed_types
587626
raise ValueError("Cannot mix tz-aware with tz-naive values")
588627

628+
if infer_reso:
629+
if state.creso_ever_changed:
630+
# We encountered mismatched resolutions, need to re-parse with
631+
# the correct one.
632+
return array_to_datetime(
633+
values,
634+
errors=errors,
635+
yearfirst=yearfirst,
636+
dayfirst=dayfirst,
637+
utc=utc,
638+
creso=state.creso,
639+
)
640+
641+
# Otherwise we can use the single reso that we encountered and avoid
642+
# a second pass.
643+
abbrev = npy_unit_to_abbrev(state.creso)
644+
result = iresult.view(f"M8[{abbrev}]")
589645
return result, tz_out
590646

591647

pandas/tests/tslibs/test_array_to_datetime.py

+60
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,66 @@
2323
creso_infer = NpyDatetimeUnit.NPY_FR_GENERIC.value
2424

2525

26+
class TestArrayToDatetimeResolutionInference:
27+
# TODO: tests that include tzs, ints
28+
29+
def test_infer_homogeoneous_datetimes(self):
30+
dt = datetime(2023, 10, 27, 18, 3, 5, 678000)
31+
arr = np.array([dt, dt, dt], dtype=object)
32+
result, tz = tslib.array_to_datetime(arr, creso=creso_infer)
33+
assert tz is None
34+
expected = np.array([dt, dt, dt], dtype="M8[us]")
35+
tm.assert_numpy_array_equal(result, expected)
36+
37+
def test_infer_homogeoneous_date_objects(self):
38+
dt = datetime(2023, 10, 27, 18, 3, 5, 678000)
39+
dt2 = dt.date()
40+
arr = np.array([None, dt2, dt2, dt2], dtype=object)
41+
result, tz = tslib.array_to_datetime(arr, creso=creso_infer)
42+
assert tz is None
43+
expected = np.array([np.datetime64("NaT"), dt2, dt2, dt2], dtype="M8[s]")
44+
tm.assert_numpy_array_equal(result, expected)
45+
46+
def test_infer_homogeoneous_dt64(self):
47+
dt = datetime(2023, 10, 27, 18, 3, 5, 678000)
48+
dt64 = np.datetime64(dt, "ms")
49+
arr = np.array([None, dt64, dt64, dt64], dtype=object)
50+
result, tz = tslib.array_to_datetime(arr, creso=creso_infer)
51+
assert tz is None
52+
expected = np.array([np.datetime64("NaT"), dt64, dt64, dt64], dtype="M8[ms]")
53+
tm.assert_numpy_array_equal(result, expected)
54+
55+
def test_infer_homogeoneous_timestamps(self):
56+
dt = datetime(2023, 10, 27, 18, 3, 5, 678000)
57+
ts = Timestamp(dt).as_unit("ns")
58+
arr = np.array([None, ts, ts, ts], dtype=object)
59+
result, tz = tslib.array_to_datetime(arr, creso=creso_infer)
60+
assert tz is None
61+
expected = np.array([np.datetime64("NaT")] + [ts.asm8] * 3, dtype="M8[ns]")
62+
tm.assert_numpy_array_equal(result, expected)
63+
64+
def test_infer_homogeoneous_datetimes_strings(self):
65+
item = "2023-10-27 18:03:05.678000"
66+
arr = np.array([None, item, item, item], dtype=object)
67+
result, tz = tslib.array_to_datetime(arr, creso=creso_infer)
68+
assert tz is None
69+
expected = np.array([np.datetime64("NaT"), item, item, item], dtype="M8[us]")
70+
tm.assert_numpy_array_equal(result, expected)
71+
72+
def test_infer_heterogeneous(self):
73+
dtstr = "2023-10-27 18:03:05.678000"
74+
75+
arr = np.array([dtstr, dtstr[:-3], dtstr[:-7], None], dtype=object)
76+
result, tz = tslib.array_to_datetime(arr, creso=creso_infer)
77+
assert tz is None
78+
expected = np.array(arr, dtype="M8[us]")
79+
tm.assert_numpy_array_equal(result, expected)
80+
81+
result, tz = tslib.array_to_datetime(arr[::-1], creso=creso_infer)
82+
assert tz is None
83+
tm.assert_numpy_array_equal(result, expected[::-1])
84+
85+
2686
class TestArrayToDatetimeWithTZResolutionInference:
2787
def test_array_to_datetime_with_tz_resolution(self):
2888
tz = tzoffset("custom", 3600)

0 commit comments

Comments
 (0)