Skip to content

Commit 3790452

Browse files
authored
BUG: pd.array raising with NumPy array and large dtype (#52591)
* BUG: pd.array raising with NumPy array and large dtype * Fix * Add gh ref * Move
1 parent 46ac5b8 commit 3790452

File tree

3 files changed

+25
-0
lines changed

3 files changed

+25
-0
lines changed

doc/source/whatsnew/v2.0.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ Bug fixes
2222
~~~~~~~~~
2323
- Bug in :func:`api.interchange.from_dataframe` was returning :class:`DataFrame`'s of incorrect sizes when called on slices (:issue:`52824`)
2424
- Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on bitmasks (:issue:`49888`)
25+
- Bug in :meth:`pd.array` raising for ``NumPy`` array and ``pa.large_string`` or ``pa.large_binary`` (:issue:`52590`)
2526
-
2627

2728
.. ---------------------------------------------------------------------------

pandas/core/arrays/arrow/array.py

+10
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,16 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal
245245
Construct a new ExtensionArray from a sequence of scalars.
246246
"""
247247
pa_dtype = to_pyarrow_type(dtype)
248+
if (
249+
isinstance(scalars, np.ndarray)
250+
and isinstance(dtype, ArrowDtype)
251+
and (
252+
pa.types.is_large_binary(pa_dtype) or pa.types.is_large_string(pa_dtype)
253+
)
254+
):
255+
# See https://github.com/apache/arrow/issues/35289
256+
scalars = scalars.tolist()
257+
248258
if isinstance(scalars, cls):
249259
scalars = scalars._pa_array
250260
elif not isinstance(scalars, (pa.Array, pa.ChunkedArray)):

pandas/tests/extension/test_arrow.py

+14
Original file line numberDiff line numberDiff line change
@@ -2802,6 +2802,20 @@ def test_setitem_boolean_replace_with_mask_segfault():
28022802
assert arr._pa_array == expected._pa_array
28032803

28042804

2805+
@pytest.mark.parametrize(
2806+
"data, arrow_dtype",
2807+
[
2808+
([b"a", b"b"], pa.large_binary()),
2809+
(["a", "b"], pa.large_string()),
2810+
],
2811+
)
2812+
def test_conversion_large_dtypes_from_numpy_array(data, arrow_dtype):
2813+
dtype = ArrowDtype(arrow_dtype)
2814+
result = pd.array(np.array(data), dtype=dtype)
2815+
expected = pd.array(data, dtype=dtype)
2816+
tm.assert_extension_array_equal(result, expected)
2817+
2818+
28052819
@pytest.mark.parametrize("pa_type", tm.ALL_INT_PYARROW_DTYPES + tm.FLOAT_PYARROW_DTYPES)
28062820
def test_describe_numeric_data(pa_type):
28072821
# GH 52470

0 commit comments

Comments
 (0)