From e082f6dc88d81ca24ce5425a4fb956d5bbc5e6f5 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 18 Feb 2023 19:55:35 +0100 Subject: [PATCH 1/3] BUG: Fix date format identification for dict --- pandas/io/parsers/base_parser.py | 12 +++++++++--- pandas/tests/io/parser/test_parse_dates.py | 7 ++++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 090f62b932a2b..8d71ce6dff7f8 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -1244,7 +1244,11 @@ def _isindex(colspec): raise ValueError(f"Date column {new_name} already in dict") _, col, old_names = _try_convert_dates( - converter, colspec, data_dict, orig_names + converter, + colspec, + data_dict, + orig_names, + target_name=new_name, ) new_data[new_name] = col @@ -1268,7 +1272,9 @@ def _isindex(colspec): return data_dict, new_cols -def _try_convert_dates(parser: Callable, colspec, data_dict, columns): +def _try_convert_dates( + parser: Callable, colspec, data_dict, columns, target_name: str | None = None +): colset = set(columns) colnames = [] @@ -1287,7 +1293,7 @@ def _try_convert_dates(parser: Callable, colspec, data_dict, columns): new_name = "_".join([str(x) for x in colnames]) to_parse = [np.asarray(data_dict[c]) for c in colnames if c in data_dict] - new_col = parser(*to_parse, col=new_name) + new_col = parser(*to_parse, col=new_name if target_name is None else target_name) return new_name, new_col, colnames diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 1106f699b80f8..440a8597e14f2 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -2189,9 +2189,10 @@ def test_parse_dates_dict_format_two_columns(all_parsers, key, parse_dates): 31-,12-2019 31-,12-2020""" - result = parser.read_csv( - StringIO(data), date_format={key: "%d- %m-%Y"}, parse_dates=parse_dates - ) + with tm.assert_produces_warning(None): + result = parser.read_csv( + StringIO(data), date_format={key: "%d- %m-%Y"}, parse_dates=parse_dates + ) expected = DataFrame( { key: [Timestamp("2019-12-31"), Timestamp("2020-12-31")], From 1ebb8360e9e0bf483ce82b9ad8a1f396606b1b1e Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 18 Feb 2023 19:59:17 +0100 Subject: [PATCH 2/3] Cleanup docs --- pandas/io/parsers/readers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 85e8dd6043fa5..635c98e38da16 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -262,9 +262,9 @@ more strings (corresponding to the columns defined by `parse_dates`) as arguments. - .. deprecated:: 2.0.0 - Use ``date_format`` instead, or read in as ``object`` and then apply - :func:`to_datetime` as-needed. + .. deprecated:: 2.0.0 + Use ``date_format`` instead, or read in as ``object`` and then apply + :func:`to_datetime` as-needed. date_format : str or dict of column -> format, default ``None`` If used in conjunction with ``parse_dates``, will parse dates according to this format. For anything more complex, From 38895e0dfb85b7867032f241e081c43930653ebe Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 18 Feb 2023 22:00:10 +0100 Subject: [PATCH 3/3] Cleanup docs --- pandas/io/excel/_base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 1f66fd036a1c6..de69cd159723b 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -251,9 +251,9 @@ more strings (corresponding to the columns defined by `parse_dates`) as arguments. - .. deprecated:: 2.0.0 - Use ``date_format`` instead, or read in as ``object`` and then apply - :func:`to_datetime` as-needed. + .. deprecated:: 2.0.0 + Use ``date_format`` instead, or read in as ``object`` and then apply + :func:`to_datetime` as-needed. date_format : str or dict of column -> format, default ``None`` If used in conjunction with ``parse_dates``, will parse dates according to this format. For anything more complex,