Skip to content

Commit 6b396cf

Browse files
authored
PERF: Add var to masked arrays (#48379)
1 parent 86967da commit 6b396cf

File tree

4 files changed

+45
-4
lines changed

4 files changed

+45
-4
lines changed

doc/source/whatsnew/v1.6.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ Performance improvements
108108
- Performance improvement for :meth:`Series.value_counts` with nullable dtype (:issue:`48338`)
109109
- Performance improvement for :class:`Series` constructor passing integer numpy array with nullable dtype (:issue:`48338`)
110110
- Performance improvement for :meth:`MultiIndex.unique` (:issue:`48335`)
111+
- Performance improvement in ``var`` for nullable dtypes (:issue:`48379`).
111112
- Performance improvement to :func:`read_sas` with ``blank_missing=True`` (:issue:`48502`)
112113
-
113114

pandas/core/array_algos/masked_reductions.py

+19-2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ def _reductions(
2222
skipna: bool = True,
2323
min_count: int = 0,
2424
axis: int | None = None,
25+
**kwargs,
2526
):
2627
"""
2728
Sum, mean or product for 1D masked array.
@@ -45,14 +46,14 @@ def _reductions(
4546
if mask.any(axis=axis) or check_below_min_count(values.shape, None, min_count):
4647
return libmissing.NA
4748
else:
48-
return func(values, axis=axis)
49+
return func(values, axis=axis, **kwargs)
4950
else:
5051
if check_below_min_count(values.shape, mask, min_count) and (
5152
axis is None or values.ndim == 1
5253
):
5354
return libmissing.NA
5455

55-
return func(values, where=~mask, axis=axis)
56+
return func(values, where=~mask, axis=axis, **kwargs)
5657

5758

5859
def sum(
@@ -149,3 +150,19 @@ def mean(
149150
if not values.size or mask.all():
150151
return libmissing.NA
151152
return _reductions(np.mean, values=values, mask=mask, skipna=skipna, axis=axis)
153+
154+
155+
def var(
156+
values: np.ndarray,
157+
mask: npt.NDArray[np.bool_],
158+
*,
159+
skipna: bool = True,
160+
axis: int | None = None,
161+
ddof: int = 1,
162+
):
163+
if not values.size or mask.all():
164+
return libmissing.NA
165+
166+
return _reductions(
167+
np.var, values=values, mask=mask, skipna=skipna, axis=axis, ddof=ddof
168+
)

pandas/core/arrays/masked.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -1028,7 +1028,7 @@ def _quantile(
10281028
# Reductions
10291029

10301030
def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
1031-
if name in {"any", "all", "min", "max", "sum", "prod", "mean"}:
1031+
if name in {"any", "all", "min", "max", "sum", "prod", "mean", "var"}:
10321032
return getattr(self, name)(skipna=skipna, **kwargs)
10331033

10341034
data = self._data
@@ -1106,6 +1106,19 @@ def mean(self, *, skipna=True, axis: int | None = 0, **kwargs):
11061106
"mean", result, skipna=skipna, axis=axis, **kwargs
11071107
)
11081108

1109+
def var(self, *, skipna=True, axis: int | None = 0, ddof: int = 1, **kwargs):
1110+
nv.validate_stat_ddof_func((), kwargs, fname="var")
1111+
result = masked_reductions.var(
1112+
self._data,
1113+
self._mask,
1114+
skipna=skipna,
1115+
axis=axis,
1116+
ddof=ddof,
1117+
)
1118+
return self._wrap_reduction_result(
1119+
"var", result, skipna=skipna, axis=axis, **kwargs
1120+
)
1121+
11091122
def min(self, *, skipna=True, axis: int | None = 0, **kwargs):
11101123
nv.validate_min((), kwargs)
11111124
return masked_reductions.min(

pandas/tests/reductions/test_reductions.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -696,7 +696,7 @@ def test_empty_multi(self, method, unit):
696696
expected = Series([1, np.nan], index=["a", "b"])
697697
tm.assert_series_equal(result, expected)
698698

699-
@pytest.mark.parametrize("method", ["mean"])
699+
@pytest.mark.parametrize("method", ["mean", "var"])
700700
@pytest.mark.parametrize("dtype", ["Float64", "Int64", "boolean"])
701701
def test_ops_consistency_on_empty_nullable(self, method, dtype):
702702

@@ -787,6 +787,16 @@ def test_mean_masked_overflow(self):
787787
assert result_masked - result_numpy == 0
788788
assert result_masked == 1e17
789789

790+
@pytest.mark.parametrize("ddof, exp", [(1, 2.5), (0, 2.0)])
791+
def test_var_masked_array(self, ddof, exp):
792+
# GH#48379
793+
ser = Series([1, 2, 3, 4, 5], dtype="Int64")
794+
ser_numpy_dtype = Series([1, 2, 3, 4, 5], dtype="int64")
795+
result = ser.var(ddof=ddof)
796+
result_numpy_dtype = ser_numpy_dtype.var(ddof=ddof)
797+
assert result == result_numpy_dtype
798+
assert result == exp
799+
790800
@pytest.mark.parametrize("dtype", ("m8[ns]", "m8[ns]", "M8[ns]", "M8[ns, UTC]"))
791801
@pytest.mark.parametrize("skipna", [True, False])
792802
def test_empty_timeseries_reductions_return_nat(self, dtype, skipna):

0 commit comments

Comments
 (0)