Skip to content

Commit 4e807a2

Browse files
BUG/TST: fix arrow roundtrip / parquet tests for recent pyarrow (#30077)
1 parent 080e331 commit 4e807a2

File tree

5 files changed

+44
-7
lines changed

5 files changed

+44
-7
lines changed

pandas/core/arrays/boolean.py

+18
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,24 @@ def __repr__(self) -> str:
103103
def _is_boolean(self) -> bool:
104104
return True
105105

106+
def __from_arrow__(self, array):
107+
"""Construct BooleanArray from passed pyarrow Array/ChunkedArray"""
108+
import pyarrow
109+
110+
if isinstance(array, pyarrow.Array):
111+
chunks = [array]
112+
else:
113+
# pyarrow.ChunkedArray
114+
chunks = array.chunks
115+
116+
results = []
117+
for arr in chunks:
118+
# TODO should optimize this without going through object array
119+
bool_arr = BooleanArray._from_sequence(np.array(arr))
120+
results.append(bool_arr)
121+
122+
return BooleanArray._concat_same_type(results)
123+
106124

107125
def coerce_to_array(values, mask=None, copy: bool = False):
108126
"""

pandas/core/arrays/string_.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def __from_arrow__(self, array):
8686

8787
results = []
8888
for arr in chunks:
89-
# using _from_sequence to ensure None is convered to np.nan
89+
# using _from_sequence to ensure None is convered to NA
9090
str_arr = StringArray._from_sequence(np.array(arr))
9191
results.append(str_arr)
9292

@@ -208,7 +208,10 @@ def __arrow_array__(self, type=None):
208208

209209
if type is None:
210210
type = pa.string()
211-
return pa.array(self._ndarray, type=type, from_pandas=True)
211+
212+
values = self._ndarray.copy()
213+
values[self.isna()] = None
214+
return pa.array(values, type=type, from_pandas=True)
212215

213216
def _values_for_factorize(self):
214217
arr = self._ndarray.copy()

pandas/tests/arrays/string_/test_string.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -235,5 +235,5 @@ def test_arrow_roundtrip():
235235
result = table.to_pandas()
236236
assert isinstance(result["a"].dtype, pd.StringDtype)
237237
tm.assert_frame_equal(result, df)
238-
# ensure the missing value is represented by NaN and not None
239-
assert np.isnan(result.loc[2, "a"])
238+
# ensure the missing value is represented by NA and not np.nan or None
239+
assert result.loc[2, "a"] is pd.NA

pandas/tests/arrays/test_boolean.py

+19-2
Original file line numberDiff line numberDiff line change
@@ -757,12 +757,29 @@ def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip):
757757
# result = arr[mask]
758758

759759

760-
@pytest.mark.skip(reason="broken test")
761760
@td.skip_if_no("pyarrow", min_version="0.15.0")
762761
def test_arrow_array(data):
763762
# protocol added in 0.15.0
764763
import pyarrow as pa
765764

766765
arr = pa.array(data)
767-
expected = pa.array(np.array(data, dtype=object), type=pa.bool_(), from_pandas=True)
766+
767+
# TODO use to_numpy(na_value=None) here
768+
data_object = np.array(data, dtype=object)
769+
data_object[data.isna()] = None
770+
expected = pa.array(data_object, type=pa.bool_(), from_pandas=True)
768771
assert arr.equals(expected)
772+
773+
774+
@td.skip_if_no("pyarrow", min_version="0.15.1.dev")
775+
def test_arrow_roundtrip():
776+
# roundtrip possible from arrow 1.0.0
777+
import pyarrow as pa
778+
779+
data = pd.array([True, False, None], dtype="boolean")
780+
df = pd.DataFrame({"a": data})
781+
table = pa.table(df)
782+
assert table.field("a").type == "bool"
783+
result = table.to_pandas()
784+
assert isinstance(result["a"].dtype, pd.BooleanDtype)
785+
tm.assert_frame_equal(result, df)

pandas/tests/io/test_parquet.py

-1
Original file line numberDiff line numberDiff line change
@@ -525,7 +525,6 @@ def test_write_with_schema(self, pa):
525525
out_df = df.astype(bool)
526526
check_round_trip(df, pa, write_kwargs={"schema": schema}, expected=out_df)
527527

528-
@pytest.mark.skip(reason="broken test")
529528
@td.skip_if_no("pyarrow", min_version="0.15.0")
530529
def test_additional_extension_arrays(self, pa):
531530
# test additional ExtensionArrays that are supported through the

0 commit comments

Comments
 (0)