Skip to content

Commit 3ecb760

Browse files
committed
Merge pull request pandas-dev#8171 from behzadnouri/count-f32
BUG: GroupBy.count() with float32 data type does not exclude nan
2 parents e226bac + 70ec921 commit 3ecb760

File tree

3 files changed

+20
-7
lines changed

3 files changed

+20
-7
lines changed

doc/source/v0.15.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -669,3 +669,4 @@ Bug Fixes
669669
was a tuple (:issue:`8121`).
670670

671671
- Bug with kde plot and NaNs (:issue:`8182`)
672+
- Bug in ``GroupBy.count`` with float32 data type were nan values were not excluded (:issue:`8169`).

pandas/core/groupby.py

+5-7
Original file line numberDiff line numberDiff line change
@@ -147,10 +147,7 @@ def _last(x):
147147

148148

149149
def _count_compat(x, axis=0):
150-
try:
151-
return x.size
152-
except:
153-
return x.count()
150+
return x.count() # .size != .count(); count excludes nan
154151

155152
class Grouper(object):
156153
"""
@@ -1527,14 +1524,15 @@ def aggregate(self, values, how, axis=0):
15271524

15281525
result = self._aggregate(result, counts, values, how, is_numeric)
15291526

1530-
if self._filter_empty_groups:
1527+
if self._filter_empty_groups and not counts.all():
15311528
if result.ndim == 2:
15321529
try:
15331530
result = lib.row_bool_subset(
15341531
result, (counts > 0).view(np.uint8))
15351532
except ValueError:
15361533
result = lib.row_bool_subset_object(
1537-
result, (counts > 0).view(np.uint8))
1534+
com._ensure_object(result),
1535+
(counts > 0).view(np.uint8))
15381536
else:
15391537
result = result[counts > 0]
15401538

@@ -2477,7 +2475,7 @@ def _cython_agg_blocks(self, how, numeric_only=True):
24772475
values = block._try_operate(block.values)
24782476

24792477
if block.is_numeric:
2480-
values = com.ensure_float(values)
2478+
values = _algos.ensure_float64(values)
24812479

24822480
result, _ = self.grouper.aggregate(values, how, axis=agg_axis)
24832481

pandas/tests/test_groupby.py

+14
Original file line numberDiff line numberDiff line change
@@ -2214,6 +2214,20 @@ def test_count_object(self):
22142214
expected = pd.Series([1, 3], index=[2, 3], name='a')
22152215
tm.assert_series_equal(result, expected)
22162216

2217+
def test_count_cross_type(self): # GH8169
2218+
vals = np.hstack((np.random.randint(0,5,(100,2)),
2219+
np.random.randint(0,2,(100,2))))
2220+
2221+
df = pd.DataFrame(vals, columns=['a', 'b', 'c', 'd'])
2222+
df[df==2] = np.nan
2223+
expected = df.groupby(['c', 'd']).count()
2224+
2225+
for t in ['float32', 'object']:
2226+
df['a'] = df['a'].astype(t)
2227+
df['b'] = df['b'].astype(t)
2228+
result = df.groupby(['c', 'd']).count()
2229+
tm.assert_frame_equal(result, expected)
2230+
22172231
def test_non_cython_api(self):
22182232

22192233
# GH5610

0 commit comments

Comments
 (0)