diff --git a/asv_bench/benchmarks/io/excel.py b/asv_bench/benchmarks/io/excel.py index 80af2cff41769..e9776ff2c641e 100644 --- a/asv_bench/benchmarks/io/excel.py +++ b/asv_bench/benchmarks/io/excel.py @@ -11,7 +11,7 @@ def _generate_dataframe(): - N = 2000 + N = 20000 C = 5 df = DataFrame( np.random.randn(N, C), @@ -69,5 +69,9 @@ def time_read_excel(self, engine): fname = self.fname_odf if engine == "odf" else self.fname_excel read_excel(fname, engine=engine) + def nrows_read_excel(self, engine): + fname = self.fname_odf if engine == "odf" else self.fname_excel + read_excel(fname, engine=engine, nrows=1) + from ..pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index ede4fdc5e1d8b..ef3aad6080af9 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1695,7 +1695,9 @@ def _convert_to_ndarrays( result = {} for c, values in dct.items(): conv_f = None if converters is None else converters.get(c, None) - if isinstance(dtypes, dict): + if values.dtype != object: + cast_type = values.dtype + elif isinstance(dtypes, dict): cast_type = dtypes.get(c, None) else: # single dtype or None @@ -3249,6 +3251,9 @@ def _make_date_converter( ): def converter(*date_cols): if date_parser is None: + date_cols = tuple( + x if isinstance(x, np.ndarray) else x.to_numpy() for x in date_cols + ) strs = parsing.concat_date_cols(date_cols) try: diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index a6a9e5c5610f2..e48f205527677 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -18,7 +18,16 @@ from pandas.errors import DtypeWarning, EmptyDataError, ParserError import pandas.util._test_decorators as td -from pandas import DataFrame, Index, MultiIndex, Series, compat, concat, option_context +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + compat, + concat, + option_context, + to_datetime, +) import pandas._testing as tm from pandas.io.parsers import CParserWrapper, TextFileReader, TextParser @@ -2182,6 +2191,20 @@ def test_no_header_two_extra_columns(all_parsers): tm.assert_frame_equal(df, ref) +def test_dtype_with_parse_dates(all_parsers): + # GH 34066 + parser = all_parsers + data = """ +a,b +1,2020-05-23 01:00:00""" + expected = DataFrame( + [["1", "2020-05-23 01:00:00"]], columns=["a", "b"], dtype="string" + ) + expected["b"] = to_datetime(expected["b"]) + result = parser.read_csv(StringIO(data), dtype="string", parse_dates=["b"]) + tm.assert_frame_equal(result, expected) + + def test_read_csv_names_not_accepting_sets(all_parsers): # GH 34946 data = """\