@@ -9764,6 +9764,7 @@ def corr(
9764
9764
self ,
9765
9765
method : str | Callable [[np .ndarray , np .ndarray ], float ] = "pearson" ,
9766
9766
min_periods : int = 1 ,
9767
+ numeric_only : bool = True ,
9767
9768
) -> DataFrame :
9768
9769
"""
9769
9770
Compute pairwise correlation of columns, excluding NA/null values.
@@ -9784,6 +9785,10 @@ def corr(
9784
9785
Minimum number of observations required per pair of columns
9785
9786
to have a valid result. Currently only available for Pearson
9786
9787
and Spearman correlation.
9788
+ numeric_only : bool, default True
9789
+ Include only `float`, `int` or `boolean` data.
9790
+
9791
+ .. versionadded:: 1.5.0
9787
9792
9788
9793
Returns
9789
9794
-------
@@ -9823,10 +9828,13 @@ def corr(
9823
9828
dogs 1.0 NaN
9824
9829
cats NaN 1.0
9825
9830
""" # noqa:E501
9826
- numeric_df = self ._get_numeric_data ()
9827
- cols = numeric_df .columns
9831
+ if numeric_only :
9832
+ data = self ._get_numeric_data ()
9833
+ else :
9834
+ data = self
9835
+ cols = data .columns
9828
9836
idx = cols .copy ()
9829
- mat = numeric_df .to_numpy (dtype = float , na_value = np .nan , copy = False )
9837
+ mat = data .to_numpy (dtype = float , na_value = np .nan , copy = False )
9830
9838
9831
9839
if method == "pearson" :
9832
9840
correl = libalgos .nancorr (mat , minp = min_periods )
@@ -9865,7 +9873,12 @@ def corr(
9865
9873
9866
9874
return self ._constructor (correl , index = idx , columns = cols )
9867
9875
9868
- def cov (self , min_periods : int | None = None , ddof : int | None = 1 ) -> DataFrame :
9876
+ def cov (
9877
+ self ,
9878
+ min_periods : int | None = None ,
9879
+ ddof : int | None = 1 ,
9880
+ numeric_only : bool = True ,
9881
+ ) -> DataFrame :
9869
9882
"""
9870
9883
Compute pairwise covariance of columns, excluding NA/null values.
9871
9884
@@ -9896,6 +9909,11 @@ def cov(self, min_periods: int | None = None, ddof: int | None = 1) -> DataFrame
9896
9909
9897
9910
.. versionadded:: 1.1.0
9898
9911
9912
+ numeric_only : bool, default True
9913
+ Include only `float`, `int` or `boolean` data.
9914
+
9915
+ .. versionadded:: 1.5.0
9916
+
9899
9917
Returns
9900
9918
-------
9901
9919
DataFrame
@@ -9964,10 +9982,13 @@ def cov(self, min_periods: int | None = None, ddof: int | None = 1) -> DataFrame
9964
9982
b NaN 1.248003 0.191417
9965
9983
c -0.150812 0.191417 0.895202
9966
9984
"""
9967
- numeric_df = self ._get_numeric_data ()
9968
- cols = numeric_df .columns
9985
+ if numeric_only :
9986
+ data = self ._get_numeric_data ()
9987
+ else :
9988
+ data = self
9989
+ cols = data .columns
9969
9990
idx = cols .copy ()
9970
- mat = numeric_df .to_numpy (dtype = float , na_value = np .nan , copy = False )
9991
+ mat = data .to_numpy (dtype = float , na_value = np .nan , copy = False )
9971
9992
9972
9993
if notna (mat ).all ():
9973
9994
if min_periods is not None and min_periods > len (mat ):
@@ -9981,7 +10002,14 @@ def cov(self, min_periods: int | None = None, ddof: int | None = 1) -> DataFrame
9981
10002
9982
10003
return self ._constructor (base_cov , index = idx , columns = cols )
9983
10004
9984
- def corrwith (self , other , axis : Axis = 0 , drop = False , method = "pearson" ) -> Series :
10005
+ def corrwith (
10006
+ self ,
10007
+ other ,
10008
+ axis : Axis = 0 ,
10009
+ drop = False ,
10010
+ method = "pearson" ,
10011
+ numeric_only : bool = True ,
10012
+ ) -> Series :
9985
10013
"""
9986
10014
Compute pairwise correlation.
9987
10015
@@ -10008,6 +10036,11 @@ def corrwith(self, other, axis: Axis = 0, drop=False, method="pearson") -> Serie
10008
10036
* callable: callable with input two 1d ndarrays
10009
10037
and returning a float.
10010
10038
10039
+ numeric_only : bool, default True
10040
+ Include only `float`, `int` or `boolean` data.
10041
+
10042
+ .. versionadded:: 1.5.0
10043
+
10011
10044
Returns
10012
10045
-------
10013
10046
Series
@@ -10039,7 +10072,10 @@ def corrwith(self, other, axis: Axis = 0, drop=False, method="pearson") -> Serie
10039
10072
dtype: float64
10040
10073
""" # noqa:E501
10041
10074
axis = self ._get_axis_number (axis )
10042
- this = self ._get_numeric_data ()
10075
+ if numeric_only :
10076
+ this = self ._get_numeric_data ()
10077
+ else :
10078
+ this = self
10043
10079
10044
10080
# GH46174: when other is a Series object and axis=0, we achieve a speedup over
10045
10081
# passing .corr() to .apply() by taking the columns as ndarrays and iterating
@@ -10052,19 +10088,23 @@ def corrwith(self, other, axis: Axis = 0, drop=False, method="pearson") -> Serie
10052
10088
if isinstance (other , Series ):
10053
10089
if axis == 0 and method in ["pearson" , "spearman" ]:
10054
10090
corrs = {}
10055
- numeric_cols = self .select_dtypes (include = np .number ).columns
10056
- ndf = self [numeric_cols ].values .transpose ()
10091
+ if numeric_only :
10092
+ cols = self .select_dtypes (include = np .number ).columns
10093
+ ndf = self [cols ].values .transpose ()
10094
+ else :
10095
+ cols = self .columns
10096
+ ndf = self .values .transpose ()
10057
10097
k = other .values
10058
10098
if method == "pearson" :
10059
10099
for i , r in enumerate (ndf ):
10060
10100
nonnull_mask = ~ np .isnan (r ) & ~ np .isnan (k )
10061
- corrs [numeric_cols [i ]] = np .corrcoef (
10062
- r [ nonnull_mask ], k [ nonnull_mask ]
10063
- )[ 0 , 1 ]
10101
+ corrs [cols [i ]] = np .corrcoef (r [ nonnull_mask ], k [ nonnull_mask ])[
10102
+ 0 , 1
10103
+ ]
10064
10104
else :
10065
10105
for i , r in enumerate (ndf ):
10066
10106
nonnull_mask = ~ np .isnan (r ) & ~ np .isnan (k )
10067
- corrs [numeric_cols [i ]] = np .corrcoef (
10107
+ corrs [cols [i ]] = np .corrcoef (
10068
10108
r [nonnull_mask ].argsort ().argsort (),
10069
10109
k [nonnull_mask ].argsort ().argsort (),
10070
10110
)[0 , 1 ]
0 commit comments