Skip to content

Commit 2459687

Browse files
watercrossingNo-Stream
authored andcommitted
Fix groupby().count() for datetimelike columns (pandas-dev#18167)
1 parent 5943291 commit 2459687

File tree

3 files changed

+22
-3
lines changed

3 files changed

+22
-3
lines changed

doc/source/whatsnew/v0.21.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ Bug Fixes
6060
- Bug in :class:`TimedeltaIndex` subtraction could incorrectly overflow when ``NaT`` is present (:issue:`17791`)
6161
- Bug in :class:`DatetimeIndex` subtracting datetimelike from DatetimeIndex could fail to overflow (:issue:`18020`)
6262
- Bug in ``pd.Series.rolling.skew()`` and ``rolling.kurt()`` with all equal values has floating issue (:issue:`18044`)
63+
- Bug in ``pd.DataFrameGroupBy.count()`` when counting over a datetimelike column (:issue:`13393`)
6364

6465
Conversion
6566
^^^^^^^^^^

pandas/core/groupby.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -4414,7 +4414,8 @@ def count(self):
44144414
ids, _, ngroups = self.grouper.group_info
44154415
mask = ids != -1
44164416

4417-
val = ((mask & ~isna(blk.get_values())) for blk in data.blocks)
4417+
val = ((mask & ~isna(np.atleast_2d(blk.get_values())))
4418+
for blk in data.blocks)
44184419
loc = (blk.mgr_locs for blk in data.blocks)
44194420

44204421
counter = partial(count_level_2d, labels=ids, max_bin=ngroups, axis=1)

pandas/tests/groupby/test_counting.py

+19-2
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@
22
from __future__ import print_function
33

44
import numpy as np
5+
import pytest
56

6-
from pandas import (DataFrame, Series, MultiIndex)
7-
from pandas.util.testing import assert_series_equal
7+
from pandas import (DataFrame, Series, MultiIndex, Timestamp, Timedelta,
8+
Period)
9+
from pandas.util.testing import (assert_series_equal, assert_frame_equal)
810
from pandas.compat import (range, product as cart_product)
911

1012

@@ -195,3 +197,18 @@ def test_ngroup_respects_groupby_order(self):
195197
g.ngroup())
196198
assert_series_equal(Series(df['group_index'].values),
197199
g.cumcount())
200+
201+
@pytest.mark.parametrize('datetimelike', [
202+
[Timestamp('2016-05-%02d 20:09:25+00:00' % i) for i in range(1, 4)],
203+
[Timestamp('2016-05-%02d 20:09:25' % i) for i in range(1, 4)],
204+
[Timedelta(x, unit="h") for x in range(1, 4)],
205+
[Period(freq="2W", year=2017, month=x) for x in range(1, 4)]])
206+
def test_count_with_datetimelike(self, datetimelike):
207+
# test for #13393, where DataframeGroupBy.count() fails
208+
# when counting a datetimelike column.
209+
210+
df = DataFrame({'x': ['a', 'a', 'b'], 'y': datetimelike})
211+
res = df.groupby('x').count()
212+
expected = DataFrame({'y': [2, 1]}, index=['a', 'b'])
213+
expected.index.name = "x"
214+
assert_frame_equal(expected, res)

0 commit comments

Comments
 (0)