From 45f96d95cccc376c09fd7aac5915a09feddebe0d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 26 Jul 2022 15:25:57 -0700 Subject: [PATCH 1/3] ENH: Allow read_csv(engine=pyarrow) to return ArrowExtensionArray-backed columns --- pandas/io/parsers/arrow_parser_wrapper.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py index 21e8bb5f9e89f..1b886f9ff5e79 100644 --- a/pandas/io/parsers/arrow_parser_wrapper.py +++ b/pandas/io/parsers/arrow_parser_wrapper.py @@ -5,6 +5,7 @@ from pandas.core.dtypes.inference import is_integer +from pandas.core.arrays.arrow import ArrowExtensionArray from pandas.core.frame import DataFrame from pandas.io.parsers.base_parser import ParserBase @@ -147,6 +148,10 @@ def read(self) -> DataFrame: parse_options=pyarrow_csv.ParseOptions(**self.parse_options), convert_options=pyarrow_csv.ConvertOptions(**self.convert_options), ) - - frame = table.to_pandas() + frame = DataFrame( + { + column_name: ArrowExtensionArray(array) + for column_name, array in zip(table.column_names, table.itercolumns()) + } + ) return self._finalize_output(frame) From a6fffbe86bb666560c76b54d4b79f5e4706f0458 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 27 Jul 2022 11:52:15 -0700 Subject: [PATCH 2/3] Check if tests besides dtypes fail by going to pyarrow --- pandas/_testing/asserters.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index c7924dc451752..3ec75e98b8ffd 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -474,6 +474,14 @@ def assert_attr_equal(attr: str, left, right, obj: str = "Attributes") -> None: left_attr = getattr(left, attr) right_attr = getattr(right, attr) + if attr == "dtype": + from pandas.core.arrays.arrow.dtype import ArrowDtype + + if isinstance(left_attr, ArrowDtype): + left_attr = left_attr.pyarrow_dtype.to_pandas_dtype() + if isinstance(right_attr, ArrowDtype): + right_attr = right_attr.pyarrow_dtype.to_pandas_dtype() + if left_attr is right_attr or is_matching_na(left_attr, right_attr): # e.g. both np.nan, both NaT, both pd.NA, ... return None From e5159e79cdf9db444016c4812918c8b3cf4ba700 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 27 Jul 2022 12:53:50 -0700 Subject: [PATCH 3/3] Revert "Check if tests besides dtypes fail by going to pyarrow" This reverts commit a6fffbe86bb666560c76b54d4b79f5e4706f0458. --- pandas/_testing/asserters.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 3ec75e98b8ffd..c7924dc451752 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -474,14 +474,6 @@ def assert_attr_equal(attr: str, left, right, obj: str = "Attributes") -> None: left_attr = getattr(left, attr) right_attr = getattr(right, attr) - if attr == "dtype": - from pandas.core.arrays.arrow.dtype import ArrowDtype - - if isinstance(left_attr, ArrowDtype): - left_attr = left_attr.pyarrow_dtype.to_pandas_dtype() - if isinstance(right_attr, ArrowDtype): - right_attr = right_attr.pyarrow_dtype.to_pandas_dtype() - if left_attr is right_attr or is_matching_na(left_attr, right_attr): # e.g. both np.nan, both NaT, both pd.NA, ... return None