Skip to content

Commit 6e7b37b

Browse files
author
y-p
committed
Merge pull request #3017 from y-p/GH3011
segmentation fault on groupby with categorical grouper of mismatched len
2 parents 32ad737 + 69b6d60 commit 6e7b37b

File tree

3 files changed

+18
-0
lines changed

3 files changed

+18
-0
lines changed

doc/source/v0.11.0.txt

+4
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,9 @@ Bug Fixes
321321
- Fix pretty-printing of infinite data structures (closes GH2978_)
322322
- Fixed exception when plotting timeseries bearing a timezone (closes GH2877_)
323323
- str.contains ignored na argument (GH2806_)
324+
- Substitute warning for segfault when grouping with categorical grouper
325+
of mismatched length (GH3011_)
326+
324327

325328
See the `full release notes
326329
<https://github.com/pydata/pandas/blob/master/RELEASE.rst>`__ or issue tracker
@@ -337,3 +340,4 @@ on GitHub for a complete list.
337340
.. _GH2806: https://github.com/pydata/pandas/issues/2806
338341
.. _GH2807: https://github.com/pydata/pandas/issues/2807
339342
.. _GH2918: https://github.com/pydata/pandas/issues/2918
343+
.. _GH3011: https://github.com/pydata/pandas/issues/3011

pandas/core/groupby.py

+5
Original file line numberDiff line numberDiff line change
@@ -1310,6 +1310,11 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True):
13101310
exclusions.append(gpr)
13111311
name = gpr
13121312
gpr = obj[gpr]
1313+
1314+
if (isinstance(gpr,Categorical) and len(gpr) != len(obj)):
1315+
errmsg = "Categorical grouper must have len(grouper) == len(data)"
1316+
raise AssertionError(errmsg)
1317+
13131318
ping = Grouping(group_axis, gpr, name=name, level=level, sort=sort)
13141319
groupings.append(ping)
13151320

pandas/tests/test_groupby.py

+9
Original file line numberDiff line numberDiff line change
@@ -2237,6 +2237,15 @@ def test_groupby_first_datetime64(self):
22372237
got_dt = result.dtype
22382238
self.assert_(issubclass(got_dt.type, np.datetime64))
22392239

2240+
def test_groupby_categorical_unequal_len(self):
2241+
import pandas as pd
2242+
#GH3011
2243+
series = Series([np.nan, np.nan, 1, 1, 2, 2, 3, 3, 4, 4])
2244+
bins = pd.cut(series.dropna(), 4)
2245+
2246+
# len(bins) != len(series) here
2247+
self.assertRaises(AssertionError,lambda : series.groupby(bins).mean())
2248+
22402249
def assert_fp_equal(a, b):
22412250
assert((np.abs(a - b) < 1e-12).all())
22422251

0 commit comments

Comments
 (0)