Skip to content

Commit 9e30b01

Browse files
committed
BUG: indexing empty pyarrow backed object returning corrupt object (pandas-dev#51741)
1 parent 6d5804b commit 9e30b01

File tree

4 files changed

+46
-1
lines changed

4 files changed

+46
-1
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1247,6 +1247,7 @@ Indexing
12471247
- Bug in :meth:`DataFrame.compare` does not recognize differences when comparing ``NA`` with value in nullable dtypes (:issue:`48939`)
12481248
- Bug in :meth:`Series.rename` with :class:`MultiIndex` losing extension array dtypes (:issue:`21055`)
12491249
- Bug in :meth:`DataFrame.isetitem` coercing extension array dtypes in :class:`DataFrame` to object (:issue:`49922`)
1250+
- Bug in :meth:`Series.__getitem__` returning corrupt object when selecting from an empty pyarrow backed object (:issue:`51734`)
12501251
- Bug in :class:`BusinessHour` would cause creation of :class:`DatetimeIndex` to fail when no opening hour was included in the index (:issue:`49835`)
12511252

12521253
Missing

pandas/_testing/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@
252252
else:
253253
FLOAT_PYARROW_DTYPES_STR_REPR = []
254254
ALL_INT_PYARROW_DTYPES_STR_REPR = []
255+
ALL_PYARROW_DTYPES = []
255256

256257

257258
EMPTY_STRING_PATTERN = re.compile("^$")

pandas/core/arrays/arrow/array.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -1010,7 +1010,12 @@ def _concat_same_type(
10101010
ArrowExtensionArray
10111011
"""
10121012
chunks = [array for ea in to_concat for array in ea._data.iterchunks()]
1013-
arr = pa.chunked_array(chunks)
1013+
if to_concat[0].dtype == "string":
1014+
# StringDtype has no attrivute pyarrow_dtype
1015+
pa_dtype = pa.string()
1016+
else:
1017+
pa_dtype = to_concat[0].dtype.pyarrow_dtype
1018+
arr = pa.chunked_array(chunks, type=pa_dtype)
10141019
return cls(arr)
10151020

10161021
def _accumulate(

pandas/tests/extension/test_arrow.py

+38
Original file line numberDiff line numberDiff line change
@@ -2298,3 +2298,41 @@ def test_dt_tz_localize(unit):
22982298
dtype=ArrowDtype(pa.timestamp(unit, "US/Pacific")),
22992299
)
23002300
tm.assert_series_equal(result, expected)
2301+
2302+
2303+
@pytest.mark.parametrize("skipna", [True, False])
2304+
def test_boolean_reduce_series_all_null(all_boolean_reductions, skipna):
2305+
# GH51624
2306+
ser = pd.Series([None], dtype="float64[pyarrow]")
2307+
result = getattr(ser, all_boolean_reductions)(skipna=skipna)
2308+
if skipna:
2309+
expected = all_boolean_reductions == "all"
2310+
else:
2311+
expected = pd.NA
2312+
assert result is expected
2313+
2314+
2315+
def test_from_sequence_of_strings_boolean():
2316+
true_strings = ["true", "TRUE", "True", "1", "1.0"]
2317+
false_strings = ["false", "FALSE", "False", "0", "0.0"]
2318+
nulls = [None]
2319+
strings = true_strings + false_strings + nulls
2320+
bools = (
2321+
[True] * len(true_strings) + [False] * len(false_strings) + [None] * len(nulls)
2322+
)
2323+
2324+
result = ArrowExtensionArray._from_sequence_of_strings(strings, dtype=pa.bool_())
2325+
expected = pd.array(bools, dtype="boolean[pyarrow]")
2326+
tm.assert_extension_array_equal(result, expected)
2327+
2328+
strings = ["True", "foo"]
2329+
with pytest.raises(pa.ArrowInvalid, match="Failed to parse"):
2330+
ArrowExtensionArray._from_sequence_of_strings(strings, dtype=pa.bool_())
2331+
2332+
2333+
def test_concat_empty_arrow_backed_series(dtype):
2334+
# GH#51734
2335+
ser = pd.Series([], dtype=dtype)
2336+
expected = ser.copy()
2337+
result = pd.concat([ser[np.array([], dtype=np.bool_)]])
2338+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)