Skip to content

Commit 7d58ce6

Browse files
theandygrossWillAyd
authored andcommitted
DOC: add documentation to core.window.corr (#20268)
1 parent 13febab commit 7d58ce6

File tree

1 file changed

+102
-10
lines changed

1 file changed

+102
-10
lines changed

pandas/core/window.py

+102-10
Original file line numberDiff line numberDiff line change
@@ -1391,19 +1391,113 @@ def _get_cov(X, Y):
13911391
_get_cov, pairwise=bool(pairwise))
13921392

13931393
_shared_docs['corr'] = dedent("""
1394-
%(name)s sample correlation
1394+
Calculate %(name)s correlation.
13951395
13961396
Parameters
13971397
----------
13981398
other : Series, DataFrame, or ndarray, optional
1399-
if not supplied then will default to self and produce pairwise output
1399+
If not supplied then will default to self.
14001400
pairwise : bool, default None
1401-
If False then only matching columns between self and other will be
1402-
used and the output will be a DataFrame.
1403-
If True then all pairwise combinations will be calculated and the
1404-
output will be a MultiIndex DataFrame in the case of DataFrame inputs.
1405-
In the case of missing elements, only complete pairwise observations
1406-
will be used.""")
1401+
Calculate pairwise combinations of columns within a
1402+
DataFrame. If `other` is not specified, defaults to `True`,
1403+
otherwise defaults to `False`.
1404+
Not relevant for :class:`~pandas.Series`.
1405+
**kwargs
1406+
Under Review.
1407+
1408+
Returns
1409+
-------
1410+
Series or DataFrame
1411+
Returned object type is determined by the caller of the
1412+
%(name)s calculation.
1413+
1414+
See Also
1415+
--------
1416+
Series.%(name)s : Calling object with Series data
1417+
DataFrame.%(name)s : Calling object with DataFrames
1418+
Series.corr : Equivalent method for Series
1419+
DataFrame.corr : Equivalent method for DataFrame
1420+
%(name)s.cov : Similar method to calculate covariance
1421+
numpy.corrcoef : NumPy Pearson's correlation calculation
1422+
1423+
Notes
1424+
-----
1425+
This function uses Pearson's definition of correlation
1426+
(https://en.wikipedia.org/wiki/Pearson_correlation_coefficient).
1427+
1428+
When `other` is not specified, the output will be self correlation (e.g.
1429+
all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise`
1430+
set to `True`.
1431+
1432+
Function will return `NaN`s for correlations of equal valued sequences;
1433+
this is the result of a 0/0 division error.
1434+
1435+
When `pairwise` is set to `False`, only matching columns between `self` and
1436+
`other` will be used.
1437+
1438+
When `pairwise` is set to `True`, the output will be a MultiIndex DataFrame
1439+
with the original index on the first level, and the `other` DataFrame
1440+
columns on the second level.
1441+
1442+
In the case of missing elements, only complete pairwise observations
1443+
will be used.
1444+
1445+
Examples
1446+
--------
1447+
The below example shows a rolling calculation with a window size of
1448+
four matching the equivalent function call using `numpy.corrcoef`.
1449+
1450+
>>> v1 = [3, 3, 3, 5, 8]
1451+
>>> v2 = [3, 4, 4, 4, 8]
1452+
>>> fmt = "{0:.6f}" # limit the printed precision to 6 digits
1453+
>>> # numpy returns a 2X2 array, the correlation coefficient
1454+
>>> # is the number at entry [0][1]
1455+
>>> print(fmt.format(np.corrcoef(v1[:-1], v2[:-1])[0][1]))
1456+
0.333333
1457+
>>> print(fmt.format(np.corrcoef(v1[1:], v2[1:])[0][1]))
1458+
0.916949
1459+
>>> s1 = pd.Series(v1)
1460+
>>> s2 = pd.Series(v2)
1461+
>>> s1.rolling(4).corr(s2)
1462+
0 NaN
1463+
1 NaN
1464+
2 NaN
1465+
3 0.333333
1466+
4 0.916949
1467+
dtype: float64
1468+
1469+
The below example shows a similar rolling calculation on a
1470+
DataFrame using the pairwise option.
1471+
1472+
>>> matrix = np.array([[51., 35.], [49., 30.], [47., 32.],\
1473+
[46., 31.], [50., 36.]])
1474+
>>> print(np.corrcoef(matrix[:-1,0], matrix[:-1,1]).round(7))
1475+
[[1. 0.6263001]
1476+
[0.6263001 1. ]]
1477+
>>> print(np.corrcoef(matrix[1:,0], matrix[1:,1]).round(7))
1478+
[[1. 0.5553681]
1479+
[0.5553681 1. ]]
1480+
>>> df = pd.DataFrame(matrix, columns=['X','Y'])
1481+
>>> df
1482+
X Y
1483+
0 51.0 35.0
1484+
1 49.0 30.0
1485+
2 47.0 32.0
1486+
3 46.0 31.0
1487+
4 50.0 36.0
1488+
>>> df.rolling(4).corr(pairwise=True)
1489+
X Y
1490+
0 X NaN NaN
1491+
Y NaN NaN
1492+
1 X NaN NaN
1493+
Y NaN NaN
1494+
2 X NaN NaN
1495+
Y NaN NaN
1496+
3 X 1.000000 0.626300
1497+
Y 0.626300 1.000000
1498+
4 X 1.000000 0.555368
1499+
Y 0.555368 1.000000
1500+
""")
14071501

14081502
def corr(self, other=None, pairwise=None, **kwargs):
14091503
if other is None:
@@ -1672,7 +1766,6 @@ def cov(self, other=None, pairwise=None, ddof=1, **kwargs):
16721766
ddof=ddof, **kwargs)
16731767

16741768
@Substitution(name='rolling')
1675-
@Appender(_doc_template)
16761769
@Appender(_shared_docs['corr'])
16771770
def corr(self, other=None, pairwise=None, **kwargs):
16781771
return super(Rolling, self).corr(other=other, pairwise=pairwise,
@@ -1932,7 +2025,6 @@ def cov(self, other=None, pairwise=None, ddof=1, **kwargs):
19322025
ddof=ddof, **kwargs)
19332026

19342027
@Substitution(name='expanding')
1935-
@Appender(_doc_template)
19362028
@Appender(_shared_docs['corr'])
19372029
def corr(self, other=None, pairwise=None, **kwargs):
19382030
return super(Expanding, self).corr(other=other, pairwise=pairwise,

0 commit comments

Comments
 (0)