Skip to content

Commit e4b930a

Browse files
authored
REF: Eliminate another iterchunks call in ArrowExtensionArray (#46448)
1 parent 583c8da commit e4b930a

File tree

1 file changed

+11
-44
lines changed

1 file changed

+11
-44
lines changed

pandas/core/arrays/_mixins.py

+11-44
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from typing import (
55
TYPE_CHECKING,
66
Any,
7-
Iterator,
87
Literal,
98
Sequence,
109
TypeVar,
@@ -855,65 +854,33 @@ def _set_via_chunk_iteration(
855854
"""
856855
Loop through the array chunks and set the new values while
857856
leaving the chunking layout unchanged.
858-
"""
859-
chunk_indices = self._indices_to_chunk_indices(indices)
860-
new_data = list(self._data.iterchunks())
861-
862-
for i, c_ind in enumerate(chunk_indices):
863-
n = len(c_ind)
864-
if n == 0:
865-
continue
866-
c_value, value = value[:n], value[n:]
867-
new_data[i] = self._replace_with_indices(new_data[i], c_ind, c_value)
868-
869-
return pa.chunked_array(new_data)
870-
871-
def _indices_to_chunk_indices(
872-
self, indices: npt.NDArray[np.intp]
873-
) -> Iterator[npt.NDArray[np.intp]]:
874-
"""
875-
Convert *sorted* indices for self into a list of ndarrays
876-
each containing the indices *within* each chunk of the
877-
underlying ChunkedArray.
878857
879858
Parameters
880859
----------
881860
indices : npt.NDArray[np.intp]
882861
Position indices for the underlying ChunkedArray.
883862
884-
Returns
885-
-------
886-
Generator yielding positional indices for each chunk
863+
value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object
864+
value or values to be set of ``key``.
887865
888866
Notes
889867
-----
890868
Assumes that indices is sorted. Caller is responsible for sorting.
891869
"""
892-
for start, stop in self._chunk_positional_ranges():
870+
new_data = []
871+
stop = 0
872+
for chunk in self._data.iterchunks():
873+
start, stop = stop, stop + len(chunk)
893874
if len(indices) == 0 or stop <= indices[0]:
894-
yield np.array([], dtype=np.intp)
875+
new_data.append(chunk)
895876
else:
896877
n = int(np.searchsorted(indices, stop, side="left"))
897878
c_ind = indices[:n] - start
898879
indices = indices[n:]
899-
yield c_ind
900-
901-
def _chunk_positional_ranges(self) -> tuple[tuple[int, int], ...]:
902-
"""
903-
Return a tuple of tuples each containing the left (inclusive)
904-
and right (exclusive) positional bounds of each chunk's values
905-
within the underlying ChunkedArray.
906-
907-
Returns
908-
-------
909-
tuple[tuple]
910-
"""
911-
ranges = []
912-
stop = 0
913-
for c in self._data.iterchunks():
914-
start, stop = stop, stop + len(c)
915-
ranges.append((start, stop))
916-
return tuple(ranges)
880+
n = len(c_ind)
881+
c_value, value = value[:n], value[n:]
882+
new_data.append(self._replace_with_indices(chunk, c_ind, c_value))
883+
return pa.chunked_array(new_data)
917884

918885
@classmethod
919886
def _replace_with_indices(

0 commit comments

Comments
 (0)