diff --git a/doc/source/whatsnew/v2.0.2.rst b/doc/source/whatsnew/v2.0.2.rst index 8713979331afd..a39d68a2f8ae9 100644 --- a/doc/source/whatsnew/v2.0.2.rst +++ b/doc/source/whatsnew/v2.0.2.rst @@ -29,6 +29,7 @@ Bug fixes - Bug in :func:`api.interchange.from_dataframe` was returning :class:`DataFrame`'s of incorrect sizes when called on slices (:issue:`52824`) - Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on bitmasks (:issue:`49888`) - Bug in :func:`merge` when merging on datetime columns on different resolutions (:issue:`53200`) +- Bug in :func:`read_csv` raising ``OverflowError`` for ``engine="pyarrow"`` and ``parse_dates`` set (:issue:`53295`) - Bug in :func:`to_datetime` was inferring format to contain ``"%H"`` instead of ``"%I"`` if date contained "AM" / "PM" tokens (:issue:`53147`) - Bug in :meth:`DataFrame.convert_dtypes` ignores ``convert_*`` keywords when set to False ``dtype_backend="pyarrow"`` (:issue:`52872`) - Bug in :meth:`DataFrame.sort_values` raising for PyArrow ``dictionary`` dtype (:issue:`53232`) @@ -37,7 +38,6 @@ Bug fixes - Bug in :meth:`pd.array` raising for ``NumPy`` array and ``pa.large_string`` or ``pa.large_binary`` (:issue:`52590`) - Bug in :meth:`DataFrame.__getitem__` not preserving dtypes for :class:`MultiIndex` partial keys (:issue:`51895`) - - .. --------------------------------------------------------------------------- .. _whatsnew_202.other: diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index f354fe9a53b48..2db759719fcb4 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -1120,6 +1120,9 @@ def unpack_if_single_element(arg): return arg def converter(*date_cols, col: Hashable): + if len(date_cols) == 1 and date_cols[0].dtype.kind in "Mm": + return date_cols[0] + if date_parser is lib.no_default: strs = parsing.concat_date_cols(date_cols) date_fmt = ( diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 8c3474220cde8..94f4066ea1cb2 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -2218,3 +2218,23 @@ def test_parse_dates_dict_format_index(all_parsers): index=Index([Timestamp("2019-12-31"), Timestamp("2020-12-31")], name="a"), ) tm.assert_frame_equal(result, expected) + + +def test_parse_dates_arrow_engine(all_parsers): + # GH#53295 + parser = all_parsers + data = """a,b +2000-01-01 00:00:00,1 +2000-01-01 00:00:01,1""" + + result = parser.read_csv(StringIO(data), parse_dates=["a"]) + expected = DataFrame( + { + "a": [ + Timestamp("2000-01-01 00:00:00"), + Timestamp("2000-01-01 00:00:01"), + ], + "b": 1, + } + ) + tm.assert_frame_equal(result, expected)