Skip to content

Commit cb61cea

Browse files
author
MarcoGorelli
committed
check for nat_strings when finding first null
1 parent 90b4add commit cb61cea

File tree

3 files changed

+20
-6
lines changed

3 files changed

+20
-6
lines changed

pandas/_libs/tslib.pyx

+14
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,20 @@ def array_with_unit_to_datetime(
421421

422422
return oresult, tz
423423

424+
def first_non_null(values: ndarray):
425+
"""Find first non-null value, return None if there isn't one."""
426+
cdef:
427+
Py_ssize_t n = len(values)
428+
Py_ssize_t i
429+
for i in range(n):
430+
val = values[i]
431+
if checknull_with_nat_and_na(val):
432+
continue
433+
if isinstance(val, str) and (len(val) == 0 or val in nat_strings):
434+
continue
435+
return val
436+
else:
437+
return None
424438

425439
@cython.wraparound(False)
426440
@cython.boundscheck(False)

pandas/core/tools/datetimes.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,9 @@ class FulldatetimeDict(YearMonthDayDict, total=False):
126126

127127
def _guess_datetime_format_for_array(arr, dayfirst: bool | None = False) -> str | None:
128128
# Try to guess the format based on the first non-NaN element, return None if can't
129-
non_nan_elements = notna(arr).nonzero()[0]
130-
if len(non_nan_elements):
131-
if type(first_non_nan_element := arr[non_nan_elements[0]]) is str:
132-
# GH#32264 np.str_ object
133-
return guess_datetime_format(first_non_nan_element, dayfirst=dayfirst)
129+
if type(first_non_nan_element := tslib.first_non_null(arr)) is str:
130+
# GH#32264 np.str_ object
131+
return guess_datetime_format(first_non_nan_element, dayfirst=dayfirst)
134132
return None
135133

136134

pandas/tests/tools/test_to_datetime.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -2093,7 +2093,7 @@ def test_to_datetime_dta_tz(self, klass):
20932093

20942094

20952095
class TestGuessDatetimeFormat:
2096-
@td.skip_if_not_us_locale
2096+
# @td.skip_if_not_us_locale
20972097
@pytest.mark.parametrize(
20982098
"test_array",
20992099
[
@@ -2103,6 +2103,8 @@ class TestGuessDatetimeFormat:
21032103
"2011-12-30 00:00:00.000000",
21042104
],
21052105
[np.nan, np.nan, "2011-12-30 00:00:00.000000"],
2106+
["", "2011-12-30 00:00:00.000000"],
2107+
["NaT", "2011-12-30 00:00:00.000000"],
21062108
["2011-12-30 00:00:00.000000", "random_string"],
21072109
],
21082110
)

0 commit comments

Comments
 (0)