From d19ff87fb4a9c58134b67d08a1c9db33cf1532c5 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Tue, 11 Apr 2023 12:04:23 +0200
Subject: [PATCH 1/4] BUG: pd.array raising with NumPy array and large dtype

---
 doc/source/whatsnew/v2.0.1.rst       |  1 +
 pandas/core/arrays/arrow/array.py    |  5 +++++
 pandas/tests/extension/test_arrow.py | 14 ++++++++++++++
 3 files changed, 20 insertions(+)

diff --git a/doc/source/whatsnew/v2.0.1.rst b/doc/source/whatsnew/v2.0.1.rst
index a4a58811e382f..8e4fdfaca177d 100644
--- a/doc/source/whatsnew/v2.0.1.rst
+++ b/doc/source/whatsnew/v2.0.1.rst
@@ -28,6 +28,7 @@ Bug fixes
 - Fixed segfault in :meth:`Series.to_numpy` with ``null[pyarrow]`` dtype (:issue:`52443`)
 - Bug in :func:`pandas.testing.assert_series_equal` where ``check_dtype=False`` would still raise for datetime or timedelta types with different resolutions (:issue:`52449`)
 - Bug in :meth:`ArrowDtype.__from_arrow__` not respecting if dtype is explicitly given (:issue:`52533`)
+- Bug in :meth:`pd.array` raising for ``NumPy`` array and ``pa.large_string`` or ``pa.large_binary`` (:issue:`52590`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_201.other:
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 82fc826b81a51..456263c969840 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -241,6 +241,11 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal
         Construct a new ExtensionArray from a sequence of scalars.
         """
         pa_dtype = to_pyarrow_type(dtype)
+        if isinstance(scalars, np.ndarray) and (
+            pa.types.is_large_binary(pa_dtype) or pa.types.is_large_string(pa_dtype)
+        ):
+            scalars = scalars.tolist()
+
         if isinstance(scalars, cls):
             scalars = scalars._pa_array
         elif not isinstance(scalars, (pa.Array, pa.ChunkedArray)):
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 2b606c2c5e711..e4dce05310b5b 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -2423,6 +2423,20 @@ def test_setitem_boolean_replace_with_mask_segfault():
     assert arr._pa_array == expected._pa_array
 
 
+@pytest.mark.parametrize(
+    "data, arrow_dtype",
+    [
+        ([b"a", b"b"], pa.large_binary()),
+        (["a", "b"], pa.large_string()),
+    ],
+)
+def test_conversion_large_dtypes_from_numpy_array(data, arrow_dtype):
+    dtype = ArrowDtype(arrow_dtype)
+    result = pd.array(np.array(data), dtype=dtype)
+    expected = pd.array(data, dtype=dtype)
+    tm.assert_extension_array_equal(result, expected)
+
+
 @pytest.mark.parametrize("pa_type", tm.ALL_INT_PYARROW_DTYPES + tm.FLOAT_PYARROW_DTYPES)
 def test_describe_numeric_data(pa_type):
     # GH 52470

From abcfb92f32e85d471d65ba484efe545eecca7ff0 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Tue, 11 Apr 2023 12:40:58 +0200
Subject: [PATCH 2/4] Fix

---
 pandas/core/arrays/arrow/array.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 456263c969840..af4b9bd29ab61 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -241,8 +241,12 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal
         Construct a new ExtensionArray from a sequence of scalars.
         """
         pa_dtype = to_pyarrow_type(dtype)
-        if isinstance(scalars, np.ndarray) and (
-            pa.types.is_large_binary(pa_dtype) or pa.types.is_large_string(pa_dtype)
+        if (
+            isinstance(scalars, np.ndarray)
+            and isinstance(dtype, ArrowDtype)
+            and (
+                pa.types.is_large_binary(pa_dtype) or pa.types.is_large_string(pa_dtype)
+            )
         ):
             scalars = scalars.tolist()
 

From eb30082a77f45488b07c759df39b845defe50d37 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sun, 23 Apr 2023 11:54:15 +0200
Subject: [PATCH 3/4] Add gh ref

---
 pandas/core/arrays/arrow/array.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 586283059fa11..fb54cf1f0a3e0 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -252,6 +252,7 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal
                 pa.types.is_large_binary(pa_dtype) or pa.types.is_large_string(pa_dtype)
             )
         ):
+            # See https://github.com/apache/arrow/issues/35289
             scalars = scalars.tolist()
 
         if isinstance(scalars, cls):

From 30a6917fa37395676aa78bdfcda4a1c0b5469483 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Tue, 25 Apr 2023 22:05:15 +0200
Subject: [PATCH 4/4] Move

---
 doc/source/whatsnew/v2.0.1.rst | 1 -
 doc/source/whatsnew/v2.0.2.rst | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.0.1.rst b/doc/source/whatsnew/v2.0.1.rst
index 24c380c71bceb..2613d12e43400 100644
--- a/doc/source/whatsnew/v2.0.1.rst
+++ b/doc/source/whatsnew/v2.0.1.rst
@@ -43,7 +43,6 @@ Bug fixes
 - Bug in :meth:`DataFrame.max` and related casting different :class:`Timestamp` resolutions always to nanoseconds (:issue:`52524`)
 - Bug in :meth:`Series.describe` not returning :class:`ArrowDtype` with ``pyarrow.float64`` type with numeric data (:issue:`52427`)
 - Bug in :meth:`Series.dt.tz_localize` incorrectly localizing timestamps with :class:`ArrowDtype` (:issue:`52677`)
-- Bug in :meth:`pd.array` raising for ``NumPy`` array and ``pa.large_string`` or ``pa.large_binary`` (:issue:`52590`)
 - Bug in arithmetic between ``np.datetime64`` and ``np.timedelta64`` ``NaT`` scalars with units always returning nanosecond resolution (:issue:`52295`)
 - Bug in logical and comparison operations between :class:`ArrowDtype` and numpy masked types (e.g. ``"boolean"``) (:issue:`52625`)
 - Fixed bug in :func:`merge` when merging with ``ArrowDtype`` one one and a NumPy dtype on the other side (:issue:`52406`)
diff --git a/doc/source/whatsnew/v2.0.2.rst b/doc/source/whatsnew/v2.0.2.rst
index 09932a2d2d571..f6b0b4086cb39 100644
--- a/doc/source/whatsnew/v2.0.2.rst
+++ b/doc/source/whatsnew/v2.0.2.rst
@@ -22,6 +22,7 @@ Bug fixes
 ~~~~~~~~~
 - Bug in :func:`api.interchange.from_dataframe` was returning :class:`DataFrame`'s of incorrect sizes when called on slices (:issue:`52824`)
 - Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on bitmasks (:issue:`49888`)
+- Bug in :meth:`pd.array` raising for ``NumPy`` array and ``pa.large_string`` or ``pa.large_binary`` (:issue:`52590`)
 -
 
 .. ---------------------------------------------------------------------------