Skip to content

Commit 0a4440d

Browse files
authored
PERF: BaseMaskedArray.__iter__ (#49851)
* BaseMaskedArray.__iter__ perf * fix * gh ref * clarify whatsnew
1 parent 253daaa commit 0a4440d

File tree

3 files changed

+44
-6
lines changed

3 files changed

+44
-6
lines changed

asv_bench/benchmarks/series_methods.py

+33
Original file line numberDiff line numberDiff line change
@@ -348,4 +348,37 @@ def time_rank(self, dtype):
348348
self.s.rank()
349349

350350

351+
class Iter:
352+
353+
param_names = ["dtype"]
354+
params = [
355+
"bool",
356+
"boolean",
357+
"int64",
358+
"Int64",
359+
"float64",
360+
"Float64",
361+
"datetime64[ns]",
362+
]
363+
364+
def setup(self, dtype):
365+
N = 10**5
366+
if dtype in ["bool", "boolean"]:
367+
data = np.repeat([True, False], N // 2)
368+
elif dtype in ["int64", "Int64"]:
369+
data = np.arange(N)
370+
elif dtype in ["float64", "Float64"]:
371+
data = np.random.randn(N)
372+
elif dtype == "datetime64[ns]":
373+
data = date_range("2000-01-01", freq="s", periods=N)
374+
else:
375+
raise NotImplementedError
376+
377+
self.s = Series(data, dtype=dtype)
378+
379+
def time_iter(self, dtype):
380+
for v in self.s:
381+
pass
382+
383+
351384
from .pandas_vb_common import setup # noqa: F401 isort:skip

doc/source/whatsnew/v2.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -602,7 +602,7 @@ Performance improvements
602602
- Performance improvement in :meth:`DataFrame.join` when joining on a subset of a :class:`MultiIndex` (:issue:`48611`)
603603
- Performance improvement for :meth:`MultiIndex.intersection` (:issue:`48604`)
604604
- Performance improvement in ``var`` for nullable dtypes (:issue:`48379`).
605-
- Performance improvement when iterating over a :class:`~arrays.ArrowExtensionArray` (:issue:`49825`).
605+
- Performance improvement when iterating over pyarrow and nullable dtypes (:issue:`49825`, :issue:`49851`)
606606
- Performance improvements to :func:`read_sas` (:issue:`47403`, :issue:`47405`, :issue:`47656`, :issue:`48502`)
607607
- Memory improvement in :meth:`RangeIndex.sort_values` (:issue:`48801`)
608608
- Performance improvement in :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` when ``by`` is a categorical type and ``sort=False`` (:issue:`48976`)

pandas/core/arrays/masked.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -247,11 +247,16 @@ def __setitem__(self, key, value) -> None:
247247

248248
def __iter__(self) -> Iterator:
249249
if self.ndim == 1:
250-
for i in range(len(self)):
251-
if self._mask[i]:
252-
yield self.dtype.na_value
253-
else:
254-
yield self._data[i]
250+
if not self._hasna:
251+
for val in self._data:
252+
yield val
253+
else:
254+
na_value = self.dtype.na_value
255+
for isna_, val in zip(self._mask, self._data):
256+
if isna_:
257+
yield na_value
258+
else:
259+
yield val
255260
else:
256261
for i in range(len(self)):
257262
yield self[i]

0 commit comments

Comments
 (0)