@@ -1391,19 +1391,113 @@ def _get_cov(X, Y):
1391
1391
_get_cov , pairwise = bool (pairwise ))
1392
1392
1393
1393
_shared_docs ['corr' ] = dedent ("""
1394
- %(name)s sample correlation
1394
+ Calculate %(name)s correlation.
1395
1395
1396
1396
Parameters
1397
1397
----------
1398
1398
other : Series, DataFrame, or ndarray, optional
1399
- if not supplied then will default to self and produce pairwise output
1399
+ If not supplied then will default to self.
1400
1400
pairwise : bool, default None
1401
- If False then only matching columns between self and other will be
1402
- used and the output will be a DataFrame.
1403
- If True then all pairwise combinations will be calculated and the
1404
- output will be a MultiIndex DataFrame in the case of DataFrame inputs.
1405
- In the case of missing elements, only complete pairwise observations
1406
- will be used.""" )
1401
+ Calculate pairwise combinations of columns within a
1402
+ DataFrame. If `other` is not specified, defaults to `True`,
1403
+ otherwise defaults to `False`.
1404
+ Not relevant for :class:`~pandas.Series`.
1405
+ **kwargs
1406
+ Under Review.
1407
+
1408
+ Returns
1409
+ -------
1410
+ Series or DataFrame
1411
+ Returned object type is determined by the caller of the
1412
+ %(name)s calculation.
1413
+
1414
+ See Also
1415
+ --------
1416
+ Series.%(name)s : Calling object with Series data
1417
+ DataFrame.%(name)s : Calling object with DataFrames
1418
+ Series.corr : Equivalent method for Series
1419
+ DataFrame.corr : Equivalent method for DataFrame
1420
+ %(name)s.cov : Similar method to calculate covariance
1421
+ numpy.corrcoef : NumPy Pearson's correlation calculation
1422
+
1423
+ Notes
1424
+ -----
1425
+ This function uses Pearson's definition of correlation
1426
+ (https://en.wikipedia.org/wiki/Pearson_correlation_coefficient).
1427
+
1428
+ When `other` is not specified, the output will be self correlation (e.g.
1429
+ all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise`
1430
+ set to `True`.
1431
+
1432
+ Function will return `NaN`s for correlations of equal valued sequences;
1433
+ this is the result of a 0/0 division error.
1434
+
1435
+ When `pairwise` is set to `False`, only matching columns between `self` and
1436
+ `other` will be used.
1437
+
1438
+ When `pairwise` is set to `True`, the output will be a MultiIndex DataFrame
1439
+ with the original index on the first level, and the `other` DataFrame
1440
+ columns on the second level.
1441
+
1442
+ In the case of missing elements, only complete pairwise observations
1443
+ will be used.
1444
+
1445
+ Examples
1446
+ --------
1447
+ The below example shows a rolling calculation with a window size of
1448
+ four matching the equivalent function call using `numpy.corrcoef`.
1449
+
1450
+ >>> v1 = [3, 3, 3, 5, 8]
1451
+ >>> v2 = [3, 4, 4, 4, 8]
1452
+ >>> fmt = "{0:.6f}" # limit the printed precision to 6 digits
1453
+ >>> # numpy returns a 2X2 array, the correlation coefficient
1454
+ >>> # is the number at entry [0][1]
1455
+ >>> print(fmt.format(np.corrcoef(v1[:-1], v2[:-1])[0][1]))
1456
+ 0.333333
1457
+ >>> print(fmt.format(np.corrcoef(v1[1:], v2[1:])[0][1]))
1458
+ 0.916949
1459
+ >>> s1 = pd.Series(v1)
1460
+ >>> s2 = pd.Series(v2)
1461
+ >>> s1.rolling(4).corr(s2)
1462
+ 0 NaN
1463
+ 1 NaN
1464
+ 2 NaN
1465
+ 3 0.333333
1466
+ 4 0.916949
1467
+ dtype: float64
1468
+
1469
+ The below example shows a similar rolling calculation on a
1470
+ DataFrame using the pairwise option.
1471
+
1472
+ >>> matrix = np.array([[51., 35.], [49., 30.], [47., 32.],\
1473
+ [46., 31.], [50., 36.]])
1474
+ >>> print(np.corrcoef(matrix[:-1,0], matrix[:-1,1]).round(7))
1475
+ [[1. 0.6263001]
1476
+ [0.6263001 1. ]]
1477
+ >>> print(np.corrcoef(matrix[1:,0], matrix[1:,1]).round(7))
1478
+ [[1. 0.5553681]
1479
+ [0.5553681 1. ]]
1480
+ >>> df = pd.DataFrame(matrix, columns=['X','Y'])
1481
+ >>> df
1482
+ X Y
1483
+ 0 51.0 35.0
1484
+ 1 49.0 30.0
1485
+ 2 47.0 32.0
1486
+ 3 46.0 31.0
1487
+ 4 50.0 36.0
1488
+ >>> df.rolling(4).corr(pairwise=True)
1489
+ X Y
1490
+ 0 X NaN NaN
1491
+ Y NaN NaN
1492
+ 1 X NaN NaN
1493
+ Y NaN NaN
1494
+ 2 X NaN NaN
1495
+ Y NaN NaN
1496
+ 3 X 1.000000 0.626300
1497
+ Y 0.626300 1.000000
1498
+ 4 X 1.000000 0.555368
1499
+ Y 0.555368 1.000000
1500
+ """ )
1407
1501
1408
1502
def corr (self , other = None , pairwise = None , ** kwargs ):
1409
1503
if other is None :
@@ -1672,7 +1766,6 @@ def cov(self, other=None, pairwise=None, ddof=1, **kwargs):
1672
1766
ddof = ddof , ** kwargs )
1673
1767
1674
1768
@Substitution (name = 'rolling' )
1675
- @Appender (_doc_template )
1676
1769
@Appender (_shared_docs ['corr' ])
1677
1770
def corr (self , other = None , pairwise = None , ** kwargs ):
1678
1771
return super (Rolling , self ).corr (other = other , pairwise = pairwise ,
@@ -1932,7 +2025,6 @@ def cov(self, other=None, pairwise=None, ddof=1, **kwargs):
1932
2025
ddof = ddof , ** kwargs )
1933
2026
1934
2027
@Substitution (name = 'expanding' )
1935
- @Appender (_doc_template )
1936
2028
@Appender (_shared_docs ['corr' ])
1937
2029
def corr (self , other = None , pairwise = None , ** kwargs ):
1938
2030
return super (Expanding , self ).corr (other = other , pairwise = pairwise ,
0 commit comments