diff --git a/pandas/_libs/tslibs/parsing.pyi b/pandas/_libs/tslibs/parsing.pyi index db1388672b37c..c7244447edaf7 100644 --- a/pandas/_libs/tslibs/parsing.pyi +++ b/pandas/_libs/tslibs/parsing.pyi @@ -23,7 +23,7 @@ def _does_string_look_like_datetime(py_string: str) -> bool: ... def quarter_to_myear(year: int, quarter: int, freq: str) -> tuple[int, int]: ... def try_parse_dates( values: npt.NDArray[np.object_], # object[:] - parser=..., + parser, dayfirst: bool = ..., default: datetime | None = ..., ) -> npt.NDArray[np.object_]: ... diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 992e1d90f4f3b..6d834c5494a83 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -661,7 +661,7 @@ cdef dateutil_parse( def try_parse_dates( - object[:] values, parser=None, bint dayfirst=False, default=None, + object[:] values, parser, bint dayfirst=False, default=None, ) -> np.ndarray: cdef: Py_ssize_t i, n @@ -670,32 +670,11 @@ def try_parse_dates( n = len(values) result = np.empty(n, dtype="O") - if parser is None: - if default is None: # GH2618 - date = datetime.now() - default = datetime(date.year, date.month, 1) - - def parse_date(x): - return du_parse(x, dayfirst=dayfirst, default=default) - - # EAFP here - try: - for i in range(n): - if values[i] == "": - result[i] = np.nan - else: - result[i] = parse_date(values[i]) - except Exception: - # Since parser is user-defined, we can't guess what it might raise - return values - else: - parse_date = parser - - for i in range(n): - if values[i] == "": - result[i] = np.nan - else: - result[i] = parse_date(values[i]) + for i in range(n): + if values[i] == "": + result[i] = np.nan + else: + result[i] = parser(values[i]) return result.base # .base to access underlying ndarray diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 9fdb1380e14eb..f38666ac52529 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -1121,19 +1121,13 @@ def converter(*date_cols): if date_parser is None: strs = parsing.concat_date_cols(date_cols) - try: - return tools.to_datetime( - ensure_object(strs), - utc=False, - dayfirst=dayfirst, - errors="ignore", - cache=cache_dates, - ).to_numpy() - - except ValueError: - return tools.to_datetime( - parsing.try_parse_dates(strs, dayfirst=dayfirst), cache=cache_dates - ) + return tools.to_datetime( + ensure_object(strs), + utc=False, + dayfirst=dayfirst, + errors="ignore", + cache=cache_dates, + ).to_numpy() else: try: result = tools.to_datetime( diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index bb1619c3d7bde..080410b0c913a 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -165,7 +165,9 @@ def date_parser(*date_cols): ------- parsed : Series """ - return parsing.try_parse_dates(parsing.concat_date_cols(date_cols)) + return parsing.try_parse_dates( + parsing.concat_date_cols(date_cols), parser=du_parse + ) kwds = { "header": None, @@ -2026,3 +2028,31 @@ def test_parse_dates_and_string_dtype(all_parsers): expected = DataFrame({"a": ["1"], "b": [Timestamp("2019-12-31")]}) expected["a"] = expected["a"].astype("string") tm.assert_frame_equal(result, expected) + + +def test_parse_dot_separated_dates(all_parsers): + # https://github.com/pandas-dev/pandas/issues/2586 + parser = all_parsers + data = """a,b +27.03.2003 14:55:00.000,1 +03.08.2003 15:20:00.000,2""" + if parser.engine == "pyarrow": + expected_index = Index( + ["27.03.2003 14:55:00.000", "03.08.2003 15:20:00.000"], + dtype="object", + name="a", + ) + warn = None + else: + expected_index = DatetimeIndex( + ["2003-03-27 14:55:00", "2003-08-03 15:20:00"], + dtype="datetime64[ns]", + name="a", + ) + warn = UserWarning + msg = "when dayfirst=False was specified" + result = parser.read_csv_check_warnings( + warn, msg, StringIO(data), parse_dates=True, index_col=0 + ) + expected = DataFrame({"b": [1, 2]}, index=expected_index) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 2cfb7393ed96c..7675305e27d22 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -4,7 +4,7 @@ from datetime import datetime import re -from dateutil.parser import parse +from dateutil.parser import parse as du_parse import numpy as np import pytest @@ -271,9 +271,11 @@ def test_guess_datetime_format_no_padding(string, fmt, dayfirst, warning): def test_try_parse_dates(): arr = np.array(["5/1/2000", "6/1/2000", "7/1/2000"], dtype=object) - result = parsing.try_parse_dates(arr, dayfirst=True) + result = parsing.try_parse_dates( + arr, dayfirst=True, parser=lambda x: du_parse(x, dayfirst=True) + ) - expected = np.array([parse(d, dayfirst=True) for d in arr]) + expected = np.array([du_parse(d, dayfirst=True) for d in arr]) tm.assert_numpy_array_equal(result, expected)