Skip to content

Commit 0f9adf8

Browse files
Khor Chean Weimroeschke
Khor Chean Wei
andauthored
ENH: Allow parameter min_periods in DataFrame.corrwith() (#58231)
* Testing * Testing * enhance test case * add test * testing * add * add test * enhance * add * add * add * add * add * add * enhance * enhance * enhance * Update doc/source/whatsnew/v3.0.0.rst Co-authored-by: Matthew Roeschke <[email protected]> * test * Update test_cov_corr.py --------- Co-authored-by: Matthew Roeschke <[email protected]>
1 parent 66cfd80 commit 0f9adf8

File tree

4 files changed

+36
-1
lines changed

4 files changed

+36
-1
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ Other enhancements
3939
- Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`)
4040
- :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`)
4141
- :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`)
42+
- :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`)
4243
- :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`)
4344
- :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`)
4445
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)

pandas/core/frame.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -11132,6 +11132,7 @@ def corrwith(
1113211132
drop: bool = False,
1113311133
method: CorrelationMethod = "pearson",
1113411134
numeric_only: bool = False,
11135+
min_periods: int | None = None,
1113511136
) -> Series:
1113611137
"""
1113711138
Compute pairwise correlation.
@@ -11162,6 +11163,9 @@ def corrwith(
1116211163
numeric_only : bool, default False
1116311164
Include only `float`, `int` or `boolean` data.
1116411165
11166+
min_periods : int, optional
11167+
Minimum number of observations needed to have a valid result.
11168+
1116511169
.. versionadded:: 1.5.0
1116611170
1116711171
.. versionchanged:: 2.0.0
@@ -11205,7 +11209,10 @@ def corrwith(
1120511209
this = self._get_numeric_data() if numeric_only else self
1120611210

1120711211
if isinstance(other, Series):
11208-
return this.apply(lambda x: other.corr(x, method=method), axis=axis)
11212+
return this.apply(
11213+
lambda x: other.corr(x, method=method, min_periods=min_periods),
11214+
axis=axis,
11215+
)
1120911216

1121011217
if numeric_only:
1121111218
other = other._get_numeric_data()

pandas/tests/frame/methods/test_cov_corr.py

+25
Original file line numberDiff line numberDiff line change
@@ -461,3 +461,28 @@ def test_corrwith_spearman_with_tied_data(self):
461461
result = df_bool.corrwith(ser_bool)
462462
expected = Series([0.57735, 0.57735], index=["A", "B"])
463463
tm.assert_series_equal(result, expected)
464+
465+
def test_corrwith_min_periods_method(self):
466+
# GH#9490
467+
pytest.importorskip("scipy")
468+
df1 = DataFrame(
469+
{
470+
"A": [1, np.nan, 7, 8],
471+
"B": [False, True, True, False],
472+
"C": [10, 4, 9, 3],
473+
}
474+
)
475+
df2 = df1[["B", "C"]]
476+
result = (df1 + 1).corrwith(df2.B, method="spearman", min_periods=2)
477+
expected = Series([0.0, 1.0, 0.0], index=["A", "B", "C"])
478+
tm.assert_series_equal(result, expected)
479+
480+
def test_corrwith_min_periods_boolean(self):
481+
# GH#9490
482+
df_bool = DataFrame(
483+
{"A": [True, True, False, False], "B": [True, False, False, True]}
484+
)
485+
ser_bool = Series([True, True, False, True])
486+
result = df_bool.corrwith(ser_bool, min_periods=3)
487+
expected = Series([0.57735, 0.57735], index=["A", "B"])
488+
tm.assert_series_equal(result, expected)

pandas/tests/groupby/test_api.py

+2
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,8 @@ def test_frame_consistency(groupby_func):
192192
exclude_expected = {"numeric_only"}
193193
elif groupby_func in ("quantile",):
194194
exclude_expected = {"method", "axis"}
195+
elif groupby_func in ["corrwith"]:
196+
exclude_expected = {"min_periods"}
195197
if groupby_func not in ["pct_change", "size"]:
196198
exclude_expected |= {"axis"}
197199

0 commit comments

Comments
 (0)