diff --git a/doc/source/v0.11.0.txt b/doc/source/v0.11.0.txt index 60ec7de5c4d8e..ce2cfed94a5cf 100644 --- a/doc/source/v0.11.0.txt +++ b/doc/source/v0.11.0.txt @@ -316,6 +316,9 @@ Bug Fixes reporting (GH2807_) - Fix pretty-printing of infinite data structures (closes GH2978_) - str.contains ignored na argument (GH2806_) + - Substitute warning for segfault when grouping with categorical grouper + of mismatched length (GH3011_) + See the `full release notes `__ or issue tracker @@ -331,3 +334,4 @@ on GitHub for a complete list. .. _GH2806: https://github.com/pydata/pandas/issues/2806 .. _GH2807: https://github.com/pydata/pandas/issues/2807 .. _GH2918: https://github.com/pydata/pandas/issues/2918 +.. _GH3011: https://github.com/pydata/pandas/issues/3011 diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 3f12f773db96a..9e5e9f6404aa4 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1310,6 +1310,11 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True): exclusions.append(gpr) name = gpr gpr = obj[gpr] + + if (isinstance(gpr,Categorical) and len(gpr) != len(obj)): + errmsg = "Categorical grouper must have len(grouper) == len(data)" + raise AssertionError(errmsg) + ping = Grouping(group_axis, gpr, name=name, level=level, sort=sort) groupings.append(ping) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 4b1770dd4f5df..d276e2e905623 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -2237,6 +2237,15 @@ def test_groupby_first_datetime64(self): got_dt = result.dtype self.assert_(issubclass(got_dt.type, np.datetime64)) + def test_groupby_categorical_unequal_len(self): + import pandas as pd + #GH3011 + series = Series([np.nan, np.nan, 1, 1, 2, 2, 3, 3, 4, 4]) + bins = pd.cut(series.dropna(), 4) + + # len(bins) != len(series) here + self.assertRaises(AssertionError,lambda : series.groupby(bins).mean()) + def assert_fp_equal(a, b): assert((np.abs(a - b) < 1e-12).all())