Skip to content

Commit e4097ca

Browse files
PERF/CLN: let pyarrow concat chunks instead of doing it ourselves in __from_arrow__ (pandas-dev#52928)
* PERF: let pyarrow concat chunks instead of doing it ourselves in __from_arrow__ * workaround for empty chunked arrays for older pyarrow
1 parent c632485 commit e4097ca

File tree

1 file changed

+10
-21
lines changed

1 file changed

+10
-21
lines changed

pandas/core/arrays/numeric.py

+10-21
Original file line numberDiff line numberDiff line change
@@ -88,27 +88,16 @@ def __from_arrow__(
8888

8989
array = array.cast(pyarrow_type)
9090

91-
if isinstance(array, pyarrow.Array):
92-
chunks = [array]
93-
else:
94-
# pyarrow.ChunkedArray
95-
chunks = array.chunks
96-
97-
results = []
98-
for arr in chunks:
99-
data, mask = pyarrow_array_to_numpy_and_mask(arr, dtype=self.numpy_dtype)
100-
num_arr = array_class(data.copy(), ~mask, copy=False)
101-
results.append(num_arr)
102-
103-
if not results:
104-
return array_class(
105-
np.array([], dtype=self.numpy_dtype), np.array([], dtype=np.bool_)
106-
)
107-
elif len(results) == 1:
108-
# avoid additional copy in _concat_same_type
109-
return results[0]
110-
else:
111-
return array_class._concat_same_type(results)
91+
if isinstance(array, pyarrow.ChunkedArray):
92+
# TODO this "if" can be removed when requiring pyarrow >= 10.0, which fixed
93+
# combine_chunks for empty arrays https://github.com/apache/arrow/pull/13757
94+
if array.num_chunks == 0:
95+
array = pyarrow.array([], type=array.type)
96+
else:
97+
array = array.combine_chunks()
98+
99+
data, mask = pyarrow_array_to_numpy_and_mask(array, dtype=self.numpy_dtype)
100+
return array_class(data.copy(), ~mask, copy=False)
112101

113102
@classmethod
114103
def _str_to_dtype_mapping(cls) -> Mapping[str, NumericDtype]:

0 commit comments

Comments
 (0)