Skip to content

Commit 91757c5

Browse files
Backport PR #52591 on branch 2.0.x (BUG: pd.array raising with NumPy array and large dtype) (#52951)
Backport PR #52591: BUG: pd.array raising with NumPy array and large dtype Co-authored-by: Patrick Hoefler <[email protected]>
1 parent cab4cf4 commit 91757c5

File tree

3 files changed

+25
-0
lines changed

3 files changed

+25
-0
lines changed

doc/source/whatsnew/v2.0.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ Bug fixes
2222
~~~~~~~~~
2323
- Bug in :func:`api.interchange.from_dataframe` was returning :class:`DataFrame`'s of incorrect sizes when called on slices (:issue:`52824`)
2424
- Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on bitmasks (:issue:`49888`)
25+
- Bug in :meth:`pd.array` raising for ``NumPy`` array and ``pa.large_string`` or ``pa.large_binary`` (:issue:`52590`)
2526
-
2627

2728
.. ---------------------------------------------------------------------------

pandas/core/arrays/arrow/array.py

+10
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,16 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal
247247
Construct a new ExtensionArray from a sequence of scalars.
248248
"""
249249
pa_dtype = to_pyarrow_type(dtype)
250+
if (
251+
isinstance(scalars, np.ndarray)
252+
and isinstance(dtype, ArrowDtype)
253+
and (
254+
pa.types.is_large_binary(pa_dtype) or pa.types.is_large_string(pa_dtype)
255+
)
256+
):
257+
# See https://github.com/apache/arrow/issues/35289
258+
scalars = scalars.tolist()
259+
250260
if isinstance(scalars, cls):
251261
scalars = scalars._data
252262
elif not isinstance(scalars, (pa.Array, pa.ChunkedArray)):

pandas/tests/extension/test_arrow.py

+14
Original file line numberDiff line numberDiff line change
@@ -2620,6 +2620,20 @@ def test_setitem_boolean_replace_with_mask_segfault():
26202620
assert arr._data == expected._data
26212621

26222622

2623+
@pytest.mark.parametrize(
2624+
"data, arrow_dtype",
2625+
[
2626+
([b"a", b"b"], pa.large_binary()),
2627+
(["a", "b"], pa.large_string()),
2628+
],
2629+
)
2630+
def test_conversion_large_dtypes_from_numpy_array(data, arrow_dtype):
2631+
dtype = ArrowDtype(arrow_dtype)
2632+
result = pd.array(np.array(data), dtype=dtype)
2633+
expected = pd.array(data, dtype=dtype)
2634+
tm.assert_extension_array_equal(result, expected)
2635+
2636+
26232637
@pytest.mark.parametrize("pa_type", tm.ALL_INT_PYARROW_DTYPES + tm.FLOAT_PYARROW_DTYPES)
26242638
def test_describe_numeric_data(pa_type):
26252639
# GH 52470

0 commit comments

Comments
 (0)