From 0e968fc70aaa5ef54aeb855dbada1658b11440c7 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 15 Mar 2023 02:10:10 +0100
Subject: [PATCH 1/2] BUG: read_csv for arrow with mismatching dtypes does not
 work

---
 pandas/io/parsers/c_parser_wrapper.py         | 37 ++++---------------
 .../io/parser/test_concatenate_chunks.py      | 36 ++++++++++++++++++
 2 files changed, 43 insertions(+), 30 deletions(-)
 create mode 100644 pandas/tests/io/parser/test_concatenate_chunks.py

diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py
index 4b8bc5c402157..2b24e4e873a92 100644
--- a/pandas/io/parsers/c_parser_wrapper.py
+++ b/pandas/io/parsers/c_parser_wrapper.py
@@ -23,8 +23,10 @@
     is_categorical_dtype,
     pandas_dtype,
 )
-from pandas.core.dtypes.concat import union_categoricals
-from pandas.core.dtypes.dtypes import ExtensionDtype
+from pandas.core.dtypes.concat import (
+    concat_compat,
+    union_categoricals,
+)
 
 from pandas.core.indexes.api import ensure_index_from_sequences
 
@@ -379,40 +381,15 @@ def _concatenate_chunks(chunks: list[dict[int, ArrayLike]]) -> dict:
         arrs = [chunk.pop(name) for chunk in chunks]
         # Check each arr for consistent types.
         dtypes = {a.dtype for a in arrs}
-        # TODO: shouldn't we exclude all EA dtypes here?
         numpy_dtypes = {x for x in dtypes if not is_categorical_dtype(x)}
-        if len(numpy_dtypes) > 1:
-            # error: Argument 1 to "find_common_type" has incompatible type
-            # "Set[Any]"; expected "Sequence[Union[dtype[Any], None, type,
-            # _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any,
-            # Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]]"
-            common_type = np.find_common_type(
-                numpy_dtypes,  # type: ignore[arg-type]
-                [],
-            )
-            if common_type == np.dtype(object):
-                warning_columns.append(str(name))
 
         dtype = dtypes.pop()
         if is_categorical_dtype(dtype):
             result[name] = union_categoricals(arrs, sort_categories=False)
-        elif isinstance(dtype, ExtensionDtype):
-            # TODO: concat_compat?
-            array_type = dtype.construct_array_type()
-            # error: Argument 1 to "_concat_same_type" of "ExtensionArray"
-            # has incompatible type "List[Union[ExtensionArray, ndarray]]";
-            # expected "Sequence[ExtensionArray]"
-            result[name] = array_type._concat_same_type(arrs)  # type: ignore[arg-type]
         else:
-            # error: Argument 1 to "concatenate" has incompatible
-            # type "List[Union[ExtensionArray, ndarray[Any, Any]]]"
-            # ; expected "Union[_SupportsArray[dtype[Any]],
-            # Sequence[_SupportsArray[dtype[Any]]],
-            # Sequence[Sequence[_SupportsArray[dtype[Any]]]],
-            # Sequence[Sequence[Sequence[_SupportsArray[dtype[Any]]]]]
-            # , Sequence[Sequence[Sequence[Sequence[
-            # _SupportsArray[dtype[Any]]]]]]]"
-            result[name] = np.concatenate(arrs)  # type: ignore[arg-type]
+            result[name] = concat_compat(arrs)
+            if len(numpy_dtypes) > 1 and result[name].dtype == np.dtype(object):
+                warning_columns.append(str(name))
 
     if warning_columns:
         warning_names = ",".join(warning_columns)
diff --git a/pandas/tests/io/parser/test_concatenate_chunks.py b/pandas/tests/io/parser/test_concatenate_chunks.py
new file mode 100644
index 0000000000000..1bae2317a2fc6
--- /dev/null
+++ b/pandas/tests/io/parser/test_concatenate_chunks.py
@@ -0,0 +1,36 @@
+import numpy as np
+import pytest
+
+from pandas.errors import DtypeWarning
+
+import pandas._testing as tm
+from pandas.core.arrays import ArrowExtensionArray
+
+from pandas.io.parsers.c_parser_wrapper import _concatenate_chunks
+
+
+def test_concatenate_chunks_pyarrow():
+    # GH#51876
+    pa = pytest.importorskip("pyarrow")
+    chunks = [
+        {0: ArrowExtensionArray(pa.array([1.5, 2.5]))},
+        {0: ArrowExtensionArray(pa.array([1, 2]))},
+    ]
+    result = _concatenate_chunks(chunks)
+    expected = ArrowExtensionArray(pa.array([1.5, 2.5, 1.0, 2.0]))
+    tm.assert_extension_array_equal(result[0], expected)
+
+
+def test_concatenate_chunks_pyarrow_strings():
+    # GH#51876
+    pa = pytest.importorskip("pyarrow")
+    chunks = [
+        {0: ArrowExtensionArray(pa.array([1.5, 2.5]))},
+        {0: ArrowExtensionArray(pa.array(["a", "b"]))},
+    ]
+    with tm.assert_produces_warning(DtypeWarning, match="have mixed types"):
+        result = _concatenate_chunks(chunks)
+    expected = np.concatenate(
+        [np.array([1.5, 2.5], dtype=object), np.array(["a", "b"])]
+    )
+    tm.assert_numpy_array_equal(result[0], expected)

From 559bda4f4265731c589a454ee2d17e07e4b27c3b Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 15 Mar 2023 16:57:05 +0100
Subject: [PATCH 2/2] Rename var

---
 pandas/io/parsers/c_parser_wrapper.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py
index 2b24e4e873a92..3d3e343050421 100644
--- a/pandas/io/parsers/c_parser_wrapper.py
+++ b/pandas/io/parsers/c_parser_wrapper.py
@@ -381,14 +381,14 @@ def _concatenate_chunks(chunks: list[dict[int, ArrayLike]]) -> dict:
         arrs = [chunk.pop(name) for chunk in chunks]
         # Check each arr for consistent types.
         dtypes = {a.dtype for a in arrs}
-        numpy_dtypes = {x for x in dtypes if not is_categorical_dtype(x)}
+        non_cat_dtypes = {x for x in dtypes if not is_categorical_dtype(x)}
 
         dtype = dtypes.pop()
         if is_categorical_dtype(dtype):
             result[name] = union_categoricals(arrs, sort_categories=False)
         else:
             result[name] = concat_compat(arrs)
-            if len(numpy_dtypes) > 1 and result[name].dtype == np.dtype(object):
+            if len(non_cat_dtypes) > 1 and result[name].dtype == np.dtype(object):
                 warning_columns.append(str(name))
 
     if warning_columns: