Skip to content

Commit 6c1b358

Browse files
author
Joon Ro
committed
added describe and scoreatpercentile in frame.py
1 parent 9f6802d commit 6c1b358

File tree

1 file changed

+56
-0
lines changed

1 file changed

+56
-0
lines changed

pandas/core/frame.py

+56
Original file line numberDiff line numberDiff line change
@@ -1022,6 +1022,30 @@ def _union_index(self, other):
10221022

10231023
return union_index
10241024

1025+
def describe(self):
1026+
"""
1027+
Generate various summary statistics of columns, excluding NaN values
1028+
1029+
Returns
1030+
-------
1031+
DataFrame
1032+
"""
1033+
cols = self._get_numeric_columns()
1034+
1035+
tmp = self.reindex(columns=cols)
1036+
1037+
cols_destat = ['count', 'mean', 'std', 'min', '10%', '50%', '90%', 'max']
1038+
1039+
list_destat = [tmp.count(), tmp.mean(), tmp.std(), tmp.min(),
1040+
tmp.scoreatpercentile(10), tmp.median(), tmp.scoreatpercentile(90), tmp.max()]
1041+
1042+
destats = self._constructor(np.zeros((len(cols), len(cols_destat))), index=cols, columns=cols_destat)
1043+
1044+
for i, k in enumerate(list_destat):
1045+
destats[cols_destat[i]] = k
1046+
1047+
return destats
1048+
10251049
def dropEmptyRows(self, specificColumns=None):
10261050
"""
10271051
Return DataFrame with rows omitted containing ALL NaN values
@@ -2097,6 +2121,38 @@ def mean(self, axis=0):
20972121

20982122
return summed / count
20992123

2124+
def scoreatpercentile(self, per=50, axis=0):
2125+
"""
2126+
Return array or Series of score at the given `per` percentile
2127+
over requested axis.
2128+
2129+
Parameters
2130+
----------
2131+
per : percentile
2132+
2133+
axis : {0, 1}
2134+
0 for row-wise, 1 for column-wise
2135+
2136+
Returns
2137+
-------
2138+
Series or TimeSeries
2139+
"""
2140+
from scipy.stats import scoreatpercentile
2141+
2142+
def f(arr, per):
2143+
if arr.dtype != np.float_:
2144+
arr = arr.astype(float)
2145+
return scoreatpercentile(arr[notnull(arr)], per)
2146+
2147+
if axis == 0:
2148+
scoreatper = [f(self[col].values, per) for col in self.columns]
2149+
return Series(scoreatper, index=self.columns)
2150+
elif axis == 1:
2151+
scoreatper = [f(self.xs(k).values, per) for k in self.index]
2152+
return Series(scoreatper, index=self.index)
2153+
else:
2154+
raise Exception('Must have 0<= axis <= 1')
2155+
21002156
def median(self, axis=0):
21012157
"""
21022158
Return array or Series of medians over requested axis.

0 commit comments

Comments
 (0)