@@ -1027,6 +1027,30 @@ def _union_index(self, other):
1027
1027
1028
1028
return union_index
1029
1029
1030
+ def describe (self ):
1031
+ """
1032
+ Generate various summary statistics of columns, excluding NaN values
1033
+
1034
+ Returns
1035
+ -------
1036
+ DataFrame
1037
+ """
1038
+ cols = self ._get_numeric_columns ()
1039
+
1040
+ tmp = self .reindex (columns = cols )
1041
+
1042
+ cols_destat = ['count' , 'mean' , 'std' , 'min' , '10%' , '50%' , '90%' , 'max' ]
1043
+
1044
+ list_destat = [tmp .count (), tmp .mean (), tmp .std (), tmp .min (),
1045
+ tmp .scoreatpercentile (10 ), tmp .median (), tmp .scoreatpercentile (90 ), tmp .max ()]
1046
+
1047
+ destats = self ._constructor (np .zeros ((len (cols ), len (cols_destat ))), index = cols , columns = cols_destat )
1048
+
1049
+ for i , k in enumerate (list_destat ):
1050
+ destats [cols_destat [i ]] = k
1051
+
1052
+ return destats
1053
+
1030
1054
def dropEmptyRows (self , specificColumns = None ):
1031
1055
"""
1032
1056
Return DataFrame with rows omitted containing ALL NaN values
@@ -2132,6 +2156,38 @@ def mean(self, axis=0):
2132
2156
2133
2157
return summed / count
2134
2158
2159
+ def scoreatpercentile (self , per = 50 , axis = 0 ):
2160
+ """
2161
+ Return array or Series of score at the given `per` percentile
2162
+ over requested axis.
2163
+
2164
+ Parameters
2165
+ ----------
2166
+ per : percentile
2167
+
2168
+ axis : {0, 1}
2169
+ 0 for row-wise, 1 for column-wise
2170
+
2171
+ Returns
2172
+ -------
2173
+ Series or TimeSeries
2174
+ """
2175
+ from scipy .stats import scoreatpercentile
2176
+
2177
+ def f (arr , per ):
2178
+ if arr .dtype != np .float_ :
2179
+ arr = arr .astype (float )
2180
+ return scoreatpercentile (arr [notnull (arr )], per )
2181
+
2182
+ if axis == 0 :
2183
+ scoreatper = [f (self [col ].values , per ) for col in self .columns ]
2184
+ return Series (scoreatper , index = self .columns )
2185
+ elif axis == 1 :
2186
+ scoreatper = [f (self .xs (k ).values , per ) for k in self .index ]
2187
+ return Series (scoreatper , index = self .index )
2188
+ else :
2189
+ raise Exception ('Must have 0<= axis <= 1' )
2190
+
2135
2191
def median (self , axis = 0 ):
2136
2192
"""
2137
2193
Return array or Series of medians over requested axis.
0 commit comments