From 91d7b42e10ab5e76705f356b3ffa3853623714be Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Sun, 16 Oct 2011 18:46:16 +0100 Subject: [PATCH 1/4] Create simple summaries of Series with object data. Addresses gh-210 --- pandas/core/series.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index bc921dfd31141..3a808355ea746 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5,6 +5,7 @@ # pylint: disable=E1101,E1103 # pylint: disable=W0703,W0622,W0613,W0201 +import collections import csv import itertools import operator @@ -873,12 +874,20 @@ def describe(self): ------- desc : Series """ - names = ['count', 'mean', 'std', 'min', - '25%', '50%', '75%', 'max'] - - data = [self.count(), self.mean(), self.std(), self.min(), - self.quantile(.25), self.median(), self.quantile(.75), - self.max()] + if self.dtype == object: + names = ['count', 'unique', 'top', 'freq'] + + objcounts = collections.Counter(self) + top, freq = objcounts.most_common(1)[0] + data = [self.count(), len(objcounts), top, freq] + + else: + names = ['count', 'mean', 'std', 'min', + '25%', '50%', '75%', 'max'] + + data = [self.count(), self.mean(), self.std(), self.min(), + self.quantile(.25), self.median(), self.quantile(.75), + self.max()] return Series(data, index=names) From 908534d6920ba4cd3420fce38ff77abe3abea393 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Sun, 16 Oct 2011 18:51:28 +0100 Subject: [PATCH 2/4] Fix bugs with .max() and .min() for integer columns in DataFrame. --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 212bd6331fb04..f1be9e5fd4eee 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2497,7 +2497,7 @@ def min(self, axis=0, skipna=True): min : Series """ values = self.values.copy() - if skipna: + if skipna and not issubclass(values.dtype.type, np.int_): np.putmask(values, -np.isfinite(values), np.inf) return Series(values.min(axis), index=self._get_agg_axis(axis)) @@ -2518,7 +2518,7 @@ def max(self, axis=0, skipna=True): max : Series """ values = self.values.copy() - if skipna: + if skipna and not issubclass(values.dtype.type, np.int_): np.putmask(values, -np.isfinite(values), -np.inf) return Series(values.max(axis), index=self._get_agg_axis(axis)) From 73ea108ed572790d832540ec0f40850730a0b326 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Sun, 16 Oct 2011 18:57:11 +0100 Subject: [PATCH 3/4] Add tests for .min and .max on dataframes with integer columns. --- pandas/tests/test_frame.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 92ba3c343d279..6edd4c15a1e84 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -2721,9 +2721,11 @@ def wrapper(x): def test_min(self): self._check_stat_op('min', np.min) + self._check_stat_op('min', np.min, frame=self.intframe) def test_max(self): self._check_stat_op('max', np.max) + self._check_stat_op('max', np.max, frame=self.intframe) def test_mad(self): f = lambda x: np.abs(x - x.mean()).mean() From 8cdfae0dd54be3fd22f59ebb528bb6f029b1269f Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Sun, 16 Oct 2011 18:59:04 +0100 Subject: [PATCH 4/4] Test .describe() for object series. --- pandas/tests/test_series.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 817721dbcfeb8..9a4ce05843cda 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -550,6 +550,7 @@ def test_quantile(self): def test_describe(self): _ = self.series.describe() _ = self.ts.describe() + _ = self.objSeries.describe() def test_append(self): appendedSeries = self.series.append(self.ts)