@@ -1022,6 +1022,30 @@ def _union_index(self, other):
1022
1022
1023
1023
return union_index
1024
1024
1025
+ def describe (self ):
1026
+ """
1027
+ Generate various summary statistics of columns, excluding NaN values
1028
+
1029
+ Returns
1030
+ -------
1031
+ DataFrame
1032
+ """
1033
+ cols = self ._get_numeric_columns ()
1034
+
1035
+ tmp = self .reindex (columns = cols )
1036
+
1037
+ cols_destat = ['count' , 'mean' , 'std' , 'min' , '10%' , '50%' , '90%' , 'max' ]
1038
+
1039
+ list_destat = [tmp .count (), tmp .mean (), tmp .std (), tmp .min (),
1040
+ tmp .scoreatpercentile (10 ), tmp .median (), tmp .scoreatpercentile (90 ), tmp .max ()]
1041
+
1042
+ destats = self ._constructor (np .zeros ((len (cols ), len (cols_destat ))), index = cols , columns = cols_destat )
1043
+
1044
+ for i , k in enumerate (list_destat ):
1045
+ destats [cols_destat [i ]] = k
1046
+
1047
+ return destats
1048
+
1025
1049
def dropEmptyRows (self , specificColumns = None ):
1026
1050
"""
1027
1051
Return DataFrame with rows omitted containing ALL NaN values
@@ -2097,6 +2121,38 @@ def mean(self, axis=0):
2097
2121
2098
2122
return summed / count
2099
2123
2124
+ def scoreatpercentile (self , per = 50 , axis = 0 ):
2125
+ """
2126
+ Return array or Series of score at the given `per` percentile
2127
+ over requested axis.
2128
+
2129
+ Parameters
2130
+ ----------
2131
+ per : percentile
2132
+
2133
+ axis : {0, 1}
2134
+ 0 for row-wise, 1 for column-wise
2135
+
2136
+ Returns
2137
+ -------
2138
+ Series or TimeSeries
2139
+ """
2140
+ from scipy .stats import scoreatpercentile
2141
+
2142
+ def f (arr , per ):
2143
+ if arr .dtype != np .float_ :
2144
+ arr = arr .astype (float )
2145
+ return scoreatpercentile (arr [notnull (arr )], per )
2146
+
2147
+ if axis == 0 :
2148
+ scoreatper = [f (self [col ].values , per ) for col in self .columns ]
2149
+ return Series (scoreatper , index = self .columns )
2150
+ elif axis == 1 :
2151
+ scoreatper = [f (self .xs (k ).values , per ) for k in self .index ]
2152
+ return Series (scoreatper , index = self .index )
2153
+ else :
2154
+ raise Exception ('Must have 0<= axis <= 1' )
2155
+
2100
2156
def median (self , axis = 0 ):
2101
2157
"""
2102
2158
Return array or Series of medians over requested axis.
0 commit comments