Skip to content

Commit a2608c4

Browse files
author
MarcoGorelli
committed
check for nat_strings when finding first null
1 parent eec9837 commit a2608c4

File tree

3 files changed

+24
-17
lines changed

3 files changed

+24
-17
lines changed

pandas/_libs/tslib.pyx

+7-4
Original file line numberDiff line numberDiff line change
@@ -421,20 +421,23 @@ def array_with_unit_to_datetime(
421421

422422
return oresult, tz
423423

424-
def first_non_null(values: ndarray):
425-
"""Find first non-null value, return None if there isn't one."""
424+
@cython.wraparound(False)
425+
@cython.boundscheck(False)
426+
def first_non_null(values: ndarray) -> int:
427+
"""Find position of first non-null value, return -1 if there isn't one."""
426428
cdef:
427429
Py_ssize_t n = len(values)
428430
Py_ssize_t i
431+
int result
429432
for i in range(n):
430433
val = values[i]
431434
if checknull_with_nat_and_na(val):
432435
continue
433436
if isinstance(val, str) and (len(val) == 0 or val in nat_strings):
434437
continue
435-
return val
438+
return i
436439
else:
437-
return None
440+
return -1
438441

439442
@cython.wraparound(False)
440443
@cython.boundscheck(False)

pandas/core/tools/datetimes.py

+13-10
Original file line numberDiff line numberDiff line change
@@ -129,16 +129,19 @@ class FulldatetimeDict(YearMonthDayDict, total=False):
129129

130130
def _guess_datetime_format_for_array(arr, dayfirst: bool | None = False) -> str | None:
131131
# Try to guess the format based on the first non-NaN element, return None if can't
132-
if type(first_non_nan_element := tslib.first_non_null(arr)) is str:
133-
# GH#32264 np.str_ object
134-
guessed_format = guess_datetime_format(first_non_nan_element, dayfirst=dayfirst)
135-
if guessed_format is not None:
136-
return guessed_format
137-
warnings.warn(
138-
"Could not infer format - "
139-
"to ensure consistent parsing, specify a format.",
140-
stacklevel=find_stack_level(),
141-
)
132+
if (first_non_null := tslib.first_non_null(arr)) != -1:
133+
if type(first_non_nan_element := arr[first_non_null]) is str:
134+
# GH#32264 np.str_ object
135+
guessed_format = guess_datetime_format(
136+
first_non_nan_element, dayfirst=dayfirst
137+
)
138+
if guessed_format is not None:
139+
return guessed_format
140+
warnings.warn(
141+
"Could not infer format - "
142+
"to ensure consistent parsing, specify a format.",
143+
stacklevel=find_stack_level(),
144+
)
142145
return None
143146

144147

pandas/tests/tools/test_to_datetime.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -2100,9 +2100,9 @@ def test_to_datetime_dta_tz(self, klass):
21002100

21012101

21022102
class TestGuessDatetimeFormat:
2103-
# @td.skip_if_not_us_locale
2103+
@td.skip_if_not_us_locale
21042104
@pytest.mark.parametrize(
2105-
"test_array",
2105+
"test_list",
21062106
[
21072107
[
21082108
"2011-12-30 00:00:00.000000",
@@ -2115,8 +2115,9 @@ class TestGuessDatetimeFormat:
21152115
["2011-12-30 00:00:00.000000", "random_string"],
21162116
],
21172117
)
2118-
def test_guess_datetime_format_for_array(self, test_array):
2118+
def test_guess_datetime_format_for_array(self, test_list):
21192119
expected_format = "%Y-%m-%d %H:%M:%S.%f"
2120+
test_array = np.array(test_list, dtype=object)
21202121
assert tools._guess_datetime_format_for_array(test_array) == expected_format
21212122

21222123
@td.skip_if_not_us_locale

0 commit comments

Comments
 (0)