diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index afe63b6785524..1fc2f1041e2ea 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -39,6 +39,7 @@ Other enhancements - Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`) - :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`) - :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`) +- :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`) - :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`) - :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`) - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9fbbc2c08efaa..181024b5b6b6e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -11127,6 +11127,7 @@ def corrwith( drop: bool = False, method: CorrelationMethod = "pearson", numeric_only: bool = False, + min_periods: int | None = None, ) -> Series: """ Compute pairwise correlation. @@ -11157,6 +11158,9 @@ def corrwith( numeric_only : bool, default False Include only `float`, `int` or `boolean` data. + min_periods : int, optional + Minimum number of observations needed to have a valid result. + .. versionadded:: 1.5.0 .. versionchanged:: 2.0.0 @@ -11200,7 +11204,10 @@ def corrwith( this = self._get_numeric_data() if numeric_only else self if isinstance(other, Series): - return this.apply(lambda x: other.corr(x, method=method), axis=axis) + return this.apply( + lambda x: other.corr(x, method=method, min_periods=min_periods), + axis=axis, + ) if numeric_only: other = other._get_numeric_data() diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py index 4d2d83d25e8da..53aa44f264c7a 100644 --- a/pandas/tests/frame/methods/test_cov_corr.py +++ b/pandas/tests/frame/methods/test_cov_corr.py @@ -461,3 +461,28 @@ def test_corrwith_spearman_with_tied_data(self): result = df_bool.corrwith(ser_bool) expected = Series([0.57735, 0.57735], index=["A", "B"]) tm.assert_series_equal(result, expected) + + def test_corrwith_min_periods_method(self): + # GH#9490 + pytest.importorskip("scipy") + df1 = DataFrame( + { + "A": [1, np.nan, 7, 8], + "B": [False, True, True, False], + "C": [10, 4, 9, 3], + } + ) + df2 = df1[["B", "C"]] + result = (df1 + 1).corrwith(df2.B, method="spearman", min_periods=2) + expected = Series([0.0, 1.0, 0.0], index=["A", "B", "C"]) + tm.assert_series_equal(result, expected) + + def test_corrwith_min_periods_boolean(self): + # GH#9490 + df_bool = DataFrame( + {"A": [True, True, False, False], "B": [True, False, False, True]} + ) + ser_bool = Series([True, True, False, True]) + result = df_bool.corrwith(ser_bool, min_periods=3) + expected = Series([0.57735, 0.57735], index=["A", "B"]) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_api.py b/pandas/tests/groupby/test_api.py index d2cfa530e7c65..33b39bad4ab81 100644 --- a/pandas/tests/groupby/test_api.py +++ b/pandas/tests/groupby/test_api.py @@ -192,6 +192,8 @@ def test_frame_consistency(groupby_func): exclude_expected = {"numeric_only"} elif groupby_func in ("quantile",): exclude_expected = {"method", "axis"} + elif groupby_func in ["corrwith"]: + exclude_expected = {"min_periods"} if groupby_func not in ["pct_change", "size"]: exclude_expected |= {"axis"}