Skip to content

Commit ccab72d

Browse files
haydjreback
authored andcommitted
ENH: add count method to groupby (GH5610)
1 parent 97c4a2e commit ccab72d

File tree

2 files changed

+33
-15
lines changed

2 files changed

+33
-15
lines changed

pandas/core/groupby.py

+21-15
Original file line numberDiff line numberDiff line change
@@ -673,9 +673,17 @@ def var(self, ddof=1):
673673
def size(self):
674674
"""
675675
Compute group sizes
676+
676677
"""
677678
return self.grouper.size()
678679

680+
def count(self):
681+
"""
682+
Number of non-null items in each group.
683+
684+
"""
685+
return self._python_agg_general(lambda x: notnull(x).sum())
686+
679687
sum = _groupby_function('sum', 'add', np.sum)
680688
prod = _groupby_function('prod', 'prod', np.prod)
681689
min = _groupby_function('min', 'min', np.min, numeric_only=False)
@@ -687,12 +695,10 @@ def size(self):
687695

688696
def ohlc(self):
689697
"""
690-
Compute sum of values, excluding missing values
691-
692-
For multiple groupings, the result index will be a MultiIndex
698+
Deprecated, use .resample(how="ohlc") instead.
693699
694700
"""
695-
return self._cython_agg_general('ohlc')
701+
raise AttributeError('ohlc is deprecated, use resample(how="ohlc").')
696702

697703
def nth(self, n, dropna=None):
698704
"""
@@ -939,6 +945,7 @@ def _cython_agg_general(self, how, numeric_only=True):
939945
result, names = self.grouper.aggregate(obj.values, how)
940946
except AssertionError as e:
941947
raise GroupByError(str(e))
948+
# infer old dytpe
942949
output[name] = self._try_cast(result, obj)
943950

944951
if len(output) == 0:
@@ -947,6 +954,8 @@ def _cython_agg_general(self, how, numeric_only=True):
947954
return self._wrap_aggregated_output(output, names)
948955

949956
def _python_agg_general(self, func, *args, **kwargs):
957+
_dtype = kwargs.pop("_dtype", None)
958+
950959
func = _intercept_function(func)
951960
f = lambda x: func(x, *args, **kwargs)
952961

@@ -955,7 +964,14 @@ def _python_agg_general(self, func, *args, **kwargs):
955964
for name, obj in self._iterate_slices():
956965
try:
957966
result, counts = self.grouper.agg_series(obj, f)
958-
output[name] = self._try_cast(result, obj)
967+
968+
if _dtype is None: # infer old dytpe
969+
output[name] = self._try_cast(result, obj)
970+
elif _dtype is False:
971+
output[name] = result
972+
else:
973+
output[name] = _possibly_downcast_to_dtype(result, _dtype)
974+
959975
except TypeError:
960976
continue
961977

@@ -2889,16 +2905,6 @@ def _apply_to_column_groupbys(self, func):
28892905
in self._iterate_column_groupbys()),
28902906
keys=self._selected_obj.columns, axis=1)
28912907

2892-
def ohlc(self):
2893-
"""
2894-
Compute sum of values, excluding missing values
2895-
2896-
For multiple groupings, the result index will be a MultiIndex
2897-
"""
2898-
return self._apply_to_column_groupbys(
2899-
lambda x: x._cython_agg_general('ohlc'))
2900-
2901-
29022908
from pandas.tools.plotting import boxplot_frame_groupby
29032909
DataFrameGroupBy.boxplot = boxplot_frame_groupby
29042910

pandas/tests/test_groupby.py

+12
Original file line numberDiff line numberDiff line change
@@ -1970,6 +1970,18 @@ def test_size(self):
19701970
for key, group in grouped:
19711971
self.assertEquals(result[key], len(group))
19721972

1973+
def test_count(self):
1974+
df = pd.DataFrame([[1, 2], [1, nan], [3, nan]], columns=['A', 'B'])
1975+
count_as = df.groupby('A').count()
1976+
count_not_as = df.groupby('A', as_index=False).count()
1977+
1978+
res = pd.DataFrame([[1, 1], [3, 0]], columns=['A', 'B'])
1979+
assert_frame_equal(count_not_as, res)
1980+
assert_frame_equal(count_as, res.set_index('A'))
1981+
1982+
count_B = df.groupby('A')['B'].count()
1983+
assert_series_equal(count_B, res['B'])
1984+
19731985
def test_grouping_ndarray(self):
19741986
grouped = self.df.groupby(self.df['A'].values)
19751987

0 commit comments

Comments
 (0)