Skip to content

Commit 3fd3756

Browse files
authored
ENH/BUG: infer reso in array_strptime (#55805)
* ENH/BUG: infer reso in array_strptime * increase tolerance 1000x
1 parent dd7441b commit 3fd3756

File tree

3 files changed

+67
-13
lines changed

3 files changed

+67
-13
lines changed

pandas/_libs/tslibs/strptime.pxd

+3-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@ from numpy cimport int64_t
77
from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT
88

99

10-
cdef bint parse_today_now(str val, int64_t* iresult, bint utc, NPY_DATETIMEUNIT creso)
10+
cdef bint parse_today_now(
11+
str val, int64_t* iresult, bint utc, NPY_DATETIMEUNIT creso, bint infer_reso=*
12+
)
1113

1214

1315
cdef class DatetimeParseState:

pandas/_libs/tslibs/strptime.pyx

+28-12
Original file line numberDiff line numberDiff line change
@@ -117,14 +117,16 @@ def _test_format_is_iso(f: str) -> bool:
117117

118118

119119
cdef bint parse_today_now(
120-
str val, int64_t* iresult, bint utc, NPY_DATETIMEUNIT creso
120+
str val, int64_t* iresult, bint utc, NPY_DATETIMEUNIT creso, bint infer_reso = False
121121
):
122122
# We delay this check for as long as possible
123123
# because it catches relatively rare cases
124124
cdef:
125125
_Timestamp ts
126126

127127
if val == "now":
128+
if infer_reso:
129+
creso = NPY_DATETIMEUNIT.NPY_FR_us
128130
if utc:
129131
ts = <_Timestamp>Timestamp.utcnow()
130132
iresult[0] = ts._as_creso(creso)._value
@@ -135,6 +137,8 @@ cdef bint parse_today_now(
135137
iresult[0] = ts._as_creso(creso)._value
136138
return True
137139
elif val == "today":
140+
if infer_reso:
141+
creso = NPY_DATETIMEUNIT.NPY_FR_us
138142
ts = <_Timestamp>Timestamp.today()
139143
iresult[0] = ts._as_creso(creso)._value
140144
return True
@@ -348,27 +352,33 @@ def array_strptime(
348352
else:
349353
item_reso = NPY_DATETIMEUNIT.NPY_FR_us
350354
state.update_creso(item_reso)
355+
if infer_reso:
356+
creso = state.creso
351357
tz_out = state.process_datetime(val, tz_out, utc)
352358
if isinstance(val, _Timestamp):
353-
val = (<_Timestamp>val)._as_creso(state.creso)
359+
val = (<_Timestamp>val)._as_creso(creso)
354360
iresult[i] = val.tz_localize(None)._value
355361
else:
356362
iresult[i] = pydatetime_to_dt64(
357-
val.replace(tzinfo=None), &dts, reso=state.creso
363+
val.replace(tzinfo=None), &dts, reso=creso
358364
)
359-
check_dts_bounds(&dts, state.creso)
365+
check_dts_bounds(&dts, creso)
360366
result_timezone[i] = val.tzinfo
361367
continue
362368
elif PyDate_Check(val):
363369
item_reso = NPY_DATETIMEUNIT.NPY_FR_s
364370
state.update_creso(item_reso)
365-
iresult[i] = pydate_to_dt64(val, &dts, reso=state.creso)
366-
check_dts_bounds(&dts, state.creso)
371+
if infer_reso:
372+
creso = state.creso
373+
iresult[i] = pydate_to_dt64(val, &dts, reso=creso)
374+
check_dts_bounds(&dts, creso)
367375
continue
368376
elif is_datetime64_object(val):
369377
item_reso = get_supported_reso(get_datetime64_unit(val))
370378
state.update_creso(item_reso)
371-
iresult[i] = get_datetime64_nanos(val, state.creso)
379+
if infer_reso:
380+
creso = state.creso
381+
iresult[i] = get_datetime64_nanos(val, creso)
372382
continue
373383
elif (
374384
(is_integer_object(val) or is_float_object(val))
@@ -394,7 +404,9 @@ def array_strptime(
394404
# where we left off
395405
item_reso = get_supported_reso(out_bestunit)
396406
state.update_creso(item_reso)
397-
value = npy_datetimestruct_to_datetime(state.creso, &dts)
407+
if infer_reso:
408+
creso = state.creso
409+
value = npy_datetimestruct_to_datetime(creso, &dts)
398410
if out_local == 1:
399411
# Store the out_tzoffset in seconds
400412
# since we store the total_seconds of
@@ -404,12 +416,14 @@ def array_strptime(
404416
out_local = 0
405417
out_tzoffset = 0
406418
iresult[i] = value
407-
check_dts_bounds(&dts)
419+
check_dts_bounds(&dts, creso)
408420
continue
409421

410-
if parse_today_now(val, &iresult[i], utc, state.creso):
422+
if parse_today_now(val, &iresult[i], utc, creso, infer_reso=infer_reso):
411423
item_reso = NPY_DATETIMEUNIT.NPY_FR_us
412424
state.update_creso(item_reso)
425+
if infer_reso:
426+
creso = state.creso
413427
continue
414428

415429
# Some ISO formats can't be parsed by string_to_dts
@@ -424,8 +438,10 @@ def array_strptime(
424438
val, fmt, exact, format_regex, locale_time, &dts, &item_reso
425439
)
426440
state.update_creso(item_reso)
427-
iresult[i] = npy_datetimestruct_to_datetime(state.creso, &dts)
428-
check_dts_bounds(&dts)
441+
if infer_reso:
442+
creso = state.creso
443+
iresult[i] = npy_datetimestruct_to_datetime(creso, &dts)
444+
check_dts_bounds(&dts, creso)
429445
result_timezone[i] = tz
430446

431447
except (ValueError, OutOfBoundsDatetime) as ex:

pandas/tests/tslibs/test_strptime.py

+36
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,39 @@ def test_array_strptime_resolution_mixed(self, tz):
5959
fmt = "ISO8601"
6060
res, _ = array_strptime(arr, fmt=fmt, utc=False, creso=creso_infer)
6161
tm.assert_numpy_array_equal(res, expected)
62+
63+
def test_array_strptime_resolution_todaynow(self):
64+
# specifically case where today/now is the *first* item
65+
vals = np.array(["today", np.datetime64("2017-01-01", "us")], dtype=object)
66+
67+
now = Timestamp("now").asm8
68+
res, _ = array_strptime(vals, fmt="%Y-%m-%d", utc=False, creso=creso_infer)
69+
res2, _ = array_strptime(
70+
vals[::-1], fmt="%Y-%m-%d", utc=False, creso=creso_infer
71+
)
72+
73+
# 1s is an arbitrary cutoff for call overhead; in local testing the
74+
# actual difference is about 250us
75+
tolerance = np.timedelta64(1, "s")
76+
77+
assert res.dtype == "M8[us]"
78+
assert abs(res[0] - now) < tolerance
79+
assert res[1] == vals[1]
80+
81+
assert res2.dtype == "M8[us]"
82+
assert abs(res2[1] - now) < tolerance * 2
83+
assert res2[0] == vals[1]
84+
85+
def test_array_strptime_str_outside_nano_range(self):
86+
vals = np.array(["2401-09-15"], dtype=object)
87+
expected = np.array(["2401-09-15"], dtype="M8[s]")
88+
fmt = "ISO8601"
89+
res, _ = array_strptime(vals, fmt=fmt, creso=creso_infer)
90+
tm.assert_numpy_array_equal(res, expected)
91+
92+
# non-iso -> different path
93+
vals2 = np.array(["Sep 15, 2401"], dtype=object)
94+
expected2 = np.array(["2401-09-15"], dtype="M8[s]")
95+
fmt2 = "%b %d, %Y"
96+
res2, _ = array_strptime(vals2, fmt=fmt2, creso=creso_infer)
97+
tm.assert_numpy_array_equal(res2, expected2)

0 commit comments

Comments
 (0)