Skip to content

Commit 9ef6253

Browse files
committed
Merge branch 'master' of https://github.com/joonpy/pandas into joonpy-master
2 parents 4ed2e42 + 6c1b358 commit 9ef6253

File tree

1 file changed

+56
-0
lines changed

1 file changed

+56
-0
lines changed

pandas/core/frame.py

+56
Original file line numberDiff line numberDiff line change
@@ -1027,6 +1027,30 @@ def _union_index(self, other):
10271027

10281028
return union_index
10291029

1030+
def describe(self):
1031+
"""
1032+
Generate various summary statistics of columns, excluding NaN values
1033+
1034+
Returns
1035+
-------
1036+
DataFrame
1037+
"""
1038+
cols = self._get_numeric_columns()
1039+
1040+
tmp = self.reindex(columns=cols)
1041+
1042+
cols_destat = ['count', 'mean', 'std', 'min', '10%', '50%', '90%', 'max']
1043+
1044+
list_destat = [tmp.count(), tmp.mean(), tmp.std(), tmp.min(),
1045+
tmp.scoreatpercentile(10), tmp.median(), tmp.scoreatpercentile(90), tmp.max()]
1046+
1047+
destats = self._constructor(np.zeros((len(cols), len(cols_destat))), index=cols, columns=cols_destat)
1048+
1049+
for i, k in enumerate(list_destat):
1050+
destats[cols_destat[i]] = k
1051+
1052+
return destats
1053+
10301054
def dropEmptyRows(self, specificColumns=None):
10311055
"""
10321056
Return DataFrame with rows omitted containing ALL NaN values
@@ -2132,6 +2156,38 @@ def mean(self, axis=0):
21322156

21332157
return summed / count
21342158

2159+
def scoreatpercentile(self, per=50, axis=0):
2160+
"""
2161+
Return array or Series of score at the given `per` percentile
2162+
over requested axis.
2163+
2164+
Parameters
2165+
----------
2166+
per : percentile
2167+
2168+
axis : {0, 1}
2169+
0 for row-wise, 1 for column-wise
2170+
2171+
Returns
2172+
-------
2173+
Series or TimeSeries
2174+
"""
2175+
from scipy.stats import scoreatpercentile
2176+
2177+
def f(arr, per):
2178+
if arr.dtype != np.float_:
2179+
arr = arr.astype(float)
2180+
return scoreatpercentile(arr[notnull(arr)], per)
2181+
2182+
if axis == 0:
2183+
scoreatper = [f(self[col].values, per) for col in self.columns]
2184+
return Series(scoreatper, index=self.columns)
2185+
elif axis == 1:
2186+
scoreatper = [f(self.xs(k).values, per) for k in self.index]
2187+
return Series(scoreatper, index=self.index)
2188+
else:
2189+
raise Exception('Must have 0<= axis <= 1')
2190+
21352191
def median(self, axis=0):
21362192
"""
21372193
Return array or Series of medians over requested axis.

0 commit comments

Comments
 (0)