From 70ec9211cc089f7102a7cefcaa1722290dc388bc Mon Sep 17 00:00:00 2001 From: behzad nouri Date: Wed, 3 Sep 2014 19:41:26 -0400 Subject: [PATCH] BUG: GroupBy.count() with float32 data type does not exclude nan --- doc/source/v0.15.0.txt | 1 + pandas/core/groupby.py | 12 +++++------- pandas/tests/test_groupby.py | 14 ++++++++++++++ 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index 71f39d9621bee..bfd484b363dd2 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -669,3 +669,4 @@ Bug Fixes was a tuple (:issue:`8121`). - Bug with kde plot and NaNs (:issue:`8182`) +- Bug in ``GroupBy.count`` with float32 data type were nan values were not excluded (:issue:`8169`). diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 320680726deaf..41ff6a6964841 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -147,10 +147,7 @@ def _last(x): def _count_compat(x, axis=0): - try: - return x.size - except: - return x.count() + return x.count() # .size != .count(); count excludes nan class Grouper(object): """ @@ -1527,14 +1524,15 @@ def aggregate(self, values, how, axis=0): result = self._aggregate(result, counts, values, how, is_numeric) - if self._filter_empty_groups: + if self._filter_empty_groups and not counts.all(): if result.ndim == 2: try: result = lib.row_bool_subset( result, (counts > 0).view(np.uint8)) except ValueError: result = lib.row_bool_subset_object( - result, (counts > 0).view(np.uint8)) + com._ensure_object(result), + (counts > 0).view(np.uint8)) else: result = result[counts > 0] @@ -2477,7 +2475,7 @@ def _cython_agg_blocks(self, how, numeric_only=True): values = block._try_operate(block.values) if block.is_numeric: - values = com.ensure_float(values) + values = _algos.ensure_float64(values) result, _ = self.grouper.aggregate(values, how, axis=agg_axis) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 3cfdd8ec92af3..84aaed8194013 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -2214,6 +2214,20 @@ def test_count_object(self): expected = pd.Series([1, 3], index=[2, 3], name='a') tm.assert_series_equal(result, expected) + def test_count_cross_type(self): # GH8169 + vals = np.hstack((np.random.randint(0,5,(100,2)), + np.random.randint(0,2,(100,2)))) + + df = pd.DataFrame(vals, columns=['a', 'b', 'c', 'd']) + df[df==2] = np.nan + expected = df.groupby(['c', 'd']).count() + + for t in ['float32', 'object']: + df['a'] = df['a'].astype(t) + df['b'] = df['b'].astype(t) + result = df.groupby(['c', 'd']).count() + tm.assert_frame_equal(result, expected) + def test_non_cython_api(self): # GH5610