From f6d84b94325d2b37872c6a0d670e0e9b1d2e5699 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sat, 18 Nov 2023 13:34:31 +0100 Subject: [PATCH] PERF: Improve conversion in read_csv when string option is set --- pandas/io/parsers/arrow_parser_wrapper.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py index a1d69deb6a21e..5d33126593faf 100644 --- a/pandas/io/parsers/arrow_parser_wrapper.py +++ b/pandas/io/parsers/arrow_parser_wrapper.py @@ -267,7 +267,15 @@ def read(self) -> DataFrame: dtype_mapping[pa.null()] = pd.Int64Dtype() frame = table.to_pandas(types_mapper=dtype_mapping.get) elif using_pyarrow_string_dtype(): - frame = table.to_pandas(types_mapper=arrow_string_types_mapper()) + + def types_mapper(dtype): + dtype_dict = self.kwds["dtype"] + if dtype_dict is not None and dtype_dict.get(dtype, None) is not None: + return dtype_dict.get(dtype) + return arrow_string_types_mapper()(dtype) + + frame = table.to_pandas(types_mapper=types_mapper) + else: if isinstance(self.kwds.get("dtype"), dict): frame = table.to_pandas(types_mapper=self.kwds["dtype"].get)