Skip to content

Commit 13cd542

Browse files
authored
Backport PR #51741 on branch 2.0.x (BUG: indexing empty pyarrow backed object returning corrupt object) (#51841)
1 parent 6d5804b commit 13cd542

File tree

4 files changed

+16
-1
lines changed

4 files changed

+16
-1
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1247,6 +1247,7 @@ Indexing
12471247
- Bug in :meth:`DataFrame.compare` does not recognize differences when comparing ``NA`` with value in nullable dtypes (:issue:`48939`)
12481248
- Bug in :meth:`Series.rename` with :class:`MultiIndex` losing extension array dtypes (:issue:`21055`)
12491249
- Bug in :meth:`DataFrame.isetitem` coercing extension array dtypes in :class:`DataFrame` to object (:issue:`49922`)
1250+
- Bug in :meth:`Series.__getitem__` returning corrupt object when selecting from an empty pyarrow backed object (:issue:`51734`)
12501251
- Bug in :class:`BusinessHour` would cause creation of :class:`DatetimeIndex` to fail when no opening hour was included in the index (:issue:`49835`)
12511252

12521253
Missing

pandas/_testing/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@
252252
else:
253253
FLOAT_PYARROW_DTYPES_STR_REPR = []
254254
ALL_INT_PYARROW_DTYPES_STR_REPR = []
255+
ALL_PYARROW_DTYPES = []
255256

256257

257258
EMPTY_STRING_PATTERN = re.compile("^$")

pandas/core/arrays/arrow/array.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -1010,7 +1010,12 @@ def _concat_same_type(
10101010
ArrowExtensionArray
10111011
"""
10121012
chunks = [array for ea in to_concat for array in ea._data.iterchunks()]
1013-
arr = pa.chunked_array(chunks)
1013+
if to_concat[0].dtype == "string":
1014+
# StringDtype has no attrivute pyarrow_dtype
1015+
pa_dtype = pa.string()
1016+
else:
1017+
pa_dtype = to_concat[0].dtype.pyarrow_dtype
1018+
arr = pa.chunked_array(chunks, type=pa_dtype)
10141019
return cls(arr)
10151020

10161021
def _accumulate(

pandas/tests/extension/test_arrow.py

+8
Original file line numberDiff line numberDiff line change
@@ -2298,3 +2298,11 @@ def test_dt_tz_localize(unit):
22982298
dtype=ArrowDtype(pa.timestamp(unit, "US/Pacific")),
22992299
)
23002300
tm.assert_series_equal(result, expected)
2301+
2302+
2303+
def test_concat_empty_arrow_backed_series(dtype):
2304+
# GH#51734
2305+
ser = pd.Series([], dtype=dtype)
2306+
expected = ser.copy()
2307+
result = pd.concat([ser[np.array([], dtype=np.bool_)]])
2308+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)