Skip to content

Commit 5a4dafb

Browse files
author
David Cottrell
committed
Add test and patch to fix bug GH8868 (groupby sort categorical).
1 parent fa2b684 commit 5a4dafb

File tree

3 files changed

+28
-0
lines changed

3 files changed

+28
-0
lines changed

doc/source/whatsnew/v0.16.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -307,3 +307,4 @@ Bug Fixes
307307
- Bug in ``read_csv`` with buffer overflows with certain malformed input files (:issue:`9205`)
308308
- Bug in groupby MultiIndex with missing pair (:issue:`9049`, :issue:`9344`)
309309
- Fixed bug in ``Series.groupby`` where grouping on ``MultiIndex`` levels would ignore the sort argument (:issue:`9444`)
310+
- Fix bug in ``DataFrame.Groupby`` where sort=False is ignored in case of Categorical columns. (:issue:`8868`)

pandas/core/groupby.py

+3
Original file line numberDiff line numberDiff line change
@@ -1925,6 +1925,9 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
19251925

19261926
# a passed Categorical
19271927
elif isinstance(self.grouper, Categorical):
1928+
# fix bug #GH8868 sort=False being ignored in categorical groupby
1929+
if not self.sort:
1930+
self.grouper = self.grouper.reorder_categories(self.grouper.unique())
19281931
self._labels = self.grouper.codes
19291932
self._group_index = self.grouper.categories
19301933
if self.name is None:

pandas/tests/test_groupby.py

+24
Original file line numberDiff line numberDiff line change
@@ -3265,6 +3265,30 @@ def test_no_dummy_key_names(self):
32653265
self.df['B'].values]).sum()
32663266
self.assertEqual(result.index.names, (None, None))
32673267

3268+
def test_groupby_sort_categorical(self):
3269+
# dataframe groupby sort was being ignored # GH 8868
3270+
df = DataFrame([['(7.5, 10]', 10, 10],
3271+
['(7.5, 10]', 8, 20],
3272+
['(2.5, 5]', 5, 30],
3273+
['(5, 7.5]', 6, 40],
3274+
['(2.5, 5]', 4, 50],
3275+
['(0, 2.5]', 1, 60],
3276+
['(5, 7.5]', 7, 70]], columns=['range', 'foo', 'bar'])
3277+
df['range'] = Categorical(df['range'])
3278+
index = Index(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]', '(7.5, 10]'], dtype='object')
3279+
index.name = 'range'
3280+
result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar'])
3281+
result_sort.index = index
3282+
index = Index(['(7.5, 10]', '(2.5, 5]', '(5, 7.5]', '(0, 2.5]'], dtype='object')
3283+
index.name = 'range'
3284+
result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]], index=index, columns=['foo', 'bar'])
3285+
result_nosort.index = index
3286+
3287+
col = 'range'
3288+
assert_frame_equal(result_sort, df.groupby(col, sort=True).first())
3289+
assert_frame_equal(result_nosort, df.groupby(col, sort=False).first())
3290+
3291+
32683292
def test_groupby_sort_multiindex_series(self):
32693293
# series multiindex groupby sort argument was not being passed through _compress_group_index
32703294
# GH 9444

0 commit comments

Comments
 (0)