Skip to content

DOC: fix PR02 errors in docstring for pandas.core.groupby.DataFrameGroupBy.corrwith #57243

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 6, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
pandas.core.groupby.SeriesGroupBy.rolling\
pandas.core.groupby.DataFrameGroupBy.hist\
pandas.core.groupby.DataFrameGroupBy.plot\
pandas.core.groupby.DataFrameGroupBy.corrwith\
pandas.core.groupby.SeriesGroupBy.plot # There should be no backslash in the final line, please keep this comment in the last ignored function
RET=$(($RET + $?)) ; echo $MSG "DONE"

Expand Down
8 changes: 5 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -11135,8 +11135,10 @@ def corrwith(
--------
>>> index = ["a", "b", "c", "d", "e"]
>>> columns = ["one", "two", "three", "four"]
>>> df1 = pd.DataFrame(np.arange(20).reshape(5, 4), index=index, columns=columns)
>>> df2 = pd.DataFrame(np.arange(16).reshape(4, 4), index=index[:4], columns=columns)
>>> df1 = pd.DataFrame(np.arange(20).reshape(5, 4),
... index=index, columns=columns)
>>> df2 = pd.DataFrame(np.arange(16).reshape(4, 4),
... index=index[:4], columns=columns)
>>> df1.corrwith(df2)
one 1.0
two 1.0
Expand All @@ -11151,7 +11153,7 @@ def corrwith(
d 1.0
e NaN
dtype: float64
""" # noqa: E501
"""
axis = self._get_axis_number(axis)
this = self._get_numeric_data() if numeric_only else self

Expand Down
67 changes: 66 additions & 1 deletion pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2491,14 +2491,79 @@ def dtypes(self) -> Series:
lambda df: df.dtypes, self._selected_obj
)

@doc(DataFrame.corrwith.__doc__)
def corrwith(
self,
other: DataFrame | Series,
drop: bool = False,
method: CorrelationMethod = "pearson",
numeric_only: bool = False,
) -> DataFrame:
"""
Compute pairwise correlation.

Pairwise correlation is computed between rows or columns of
DataFrame with rows or columns of Series or DataFrame. DataFrames
are first aligned along both axes before computing the
correlations.

Parameters
----------
other : DataFrame, Series
Object with which to compute correlations.
drop : bool, default False
Drop missing indices from result.
method : {'pearson', 'kendall', 'spearman'} or callable
Method of correlation:

* pearson : standard correlation coefficient
* kendall : Kendall Tau correlation coefficient
* spearman : Spearman rank correlation
* callable: callable with input two 1d ndarrays
and returning a float.

numeric_only : bool, default False
Include only `float`, `int` or `boolean` data.

.. versionadded:: 1.5.0

.. versionchanged:: 2.0.0
The default value of ``numeric_only`` is now ``False``.

Returns
-------
Series
Pairwise correlations.

See Also
--------
DataFrame.corr : Compute pairwise correlation of columns.

Examples
--------
>>> df1 = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3],
... "B": [2.6, 2.7, 2.0, 2.3, 2.9, 2.8]})
>>> df2 = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3],
... "B": [2.4, 2.4, 2.1, 2.6, 2.5, 2.1]})
>>> x = df1.groupby("A").first()
>>> x
B
A
1 2.6
2 2.0
3 2.9

>>> y = df2.groupby("A").first()
>>> y
B
A
1 2.4
2 2.1
3 2.5

>>> x.corrwith(y)
B 0.995871
dtype: float64
"""
result = self._op_via_apply(
"corrwith",
other=other,
Expand Down