Skip to content

Commit 37584bf

Browse files
committed
use correct cast
1 parent 9fa3c50 commit 37584bf

File tree

4 files changed

+50
-26
lines changed

4 files changed

+50
-26
lines changed

pandas/core/arrays/categorical.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -683,11 +683,15 @@ def _codes_for_groupby(self, sort, observed):
683683
take_codes = np.sort(take_codes)
684684

685685
# we recode according to the uniques
686-
cat._categories = self.categories.take(take_codes)
687-
cat._codes = _recode_for_categories(self.codes,
688-
self.categories,
689-
cat._categories)
690-
return cat
686+
categories = self.categories.take(take_codes)
687+
codes = _recode_for_categories(self.codes,
688+
self.categories,
689+
categories)
690+
691+
# return a new categorical that maps our new codes
692+
# and categories
693+
dtype = CategoricalDtype(categories, ordered=self.ordered)
694+
return type(self)(codes, dtype=dtype, fastpath=True)
691695

692696
# Already sorted according to self.categories; all is fine
693697
if sort:

pandas/core/groupby/groupby.py

+23-6
Original file line numberDiff line numberDiff line change
@@ -2341,10 +2341,10 @@ def recons_labels(self):
23412341
@cache_readonly
23422342
def result_index(self):
23432343
if not self.compressed and len(self.groupings) == 1:
2344-
return self.groupings[0].group_index.rename(self.names[0])
2344+
return self.groupings[0].result_index.rename(self.names[0])
23452345

23462346
labels = self.recons_labels
2347-
levels = [ping.group_index for ping in self.groupings]
2347+
levels = [ping.result_index for ping in self.groupings]
23482348
result = MultiIndex(levels=levels,
23492349
labels=labels,
23502350
verify_integrity=False,
@@ -2353,12 +2353,12 @@ def result_index(self):
23532353

23542354
def get_group_levels(self):
23552355
if not self.compressed and len(self.groupings) == 1:
2356-
return [self.groupings[0].group_index]
2356+
return [self.groupings[0].result_index]
23572357

23582358
name_list = []
23592359
for ping, labels in zip(self.groupings, self.recons_labels):
23602360
labels = _ensure_platform_int(labels)
2361-
levels = ping.group_index.take(labels)
2361+
levels = ping.result_index.take(labels)
23622362

23632363
name_list.append(levels)
23642364

@@ -2911,6 +2911,7 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
29112911
self.name = name
29122912
self.level = level
29132913
self.grouper = _convert_grouper(index, grouper)
2914+
self.all_grouper = None
29142915
self.index = index
29152916
self.sort = sort
29162917
self.obj = obj
@@ -2973,7 +2974,7 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
29732974
warnings.warn(msg, FutureWarning, stacklevel=5)
29742975
observed = False
29752976

2976-
grouper = self.grouper
2977+
self.all_grouper = self.grouper
29772978
self.grouper = self.grouper._codes_for_groupby(
29782979
self.sort, observed)
29792980
categories = self.grouper.categories
@@ -2982,7 +2983,7 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
29822983
# preserving the categories / ordered attributes
29832984
self._labels = self.grouper.codes
29842985
if observed:
2985-
codes = algorithms.unique1d(grouper.codes)
2986+
codes = algorithms.unique1d(self.grouper.codes)
29862987
else:
29872988
codes = np.arange(len(categories))
29882989

@@ -3049,6 +3050,22 @@ def labels(self):
30493050
self._make_labels()
30503051
return self._labels
30513052

3053+
@cache_readonly
3054+
def result_index(self):
3055+
if self.all_grouper is not None:
3056+
all_categories = self.all_grouper.categories
3057+
3058+
# we re-order to the original category orderings
3059+
if self.sort:
3060+
return self.group_index.set_categories(all_categories)
3061+
3062+
# we are not sorting, so add unobserved to the end
3063+
categories = self.group_index.categories
3064+
return self.group_index.add_categories(
3065+
all_categories[~all_categories.isin(categories)])
3066+
3067+
return self.group_index
3068+
30523069
@property
30533070
def group_index(self):
30543071
if self._group_index is None:

pandas/tests/groupby/test_categorical.py

+17-14
Original file line numberDiff line numberDiff line change
@@ -705,37 +705,40 @@ def test_sort2():
705705
df['range'] = Categorical(df['range'], ordered=True)
706706
index = CategoricalIndex(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]',
707707
'(7.5, 10]'], name='range', ordered=True)
708-
result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]],
709-
columns=['foo', 'bar'], index=index)
708+
expected_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]],
709+
columns=['foo', 'bar'], index=index)
710710

711711
col = 'range'
712-
assert_frame_equal(
713-
result_sort, df.groupby(col, sort=True, observed=False).first())
712+
result_sort = df.groupby(col, sort=True, observed=False).first()
713+
assert_frame_equal(result_sort, expected_sort)
714714

715715
# when categories is ordered, group is ordered by category's order
716-
assert_frame_equal(
717-
result_sort, df.groupby(col, sort=False, observed=False).first())
716+
expected_sort = result_sort
717+
result_sort = df.groupby(col, sort=False, observed=False).first()
718+
assert_frame_equal(result_sort, expected_sort)
718719

719720
df['range'] = Categorical(df['range'], ordered=False)
720721
index = CategoricalIndex(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]',
721722
'(7.5, 10]'], name='range')
722-
result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]],
723-
columns=['foo', 'bar'], index=index)
723+
expected_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]],
724+
columns=['foo', 'bar'], index=index)
724725

725726
index = CategoricalIndex(['(7.5, 10]', '(2.5, 5]', '(5, 7.5]',
726727
'(0, 2.5]'],
727728
categories=['(7.5, 10]', '(2.5, 5]',
728729
'(5, 7.5]', '(0, 2.5]'],
729730
name='range')
730-
result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]],
731-
index=index, columns=['foo', 'bar'])
731+
expected_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]],
732+
index=index, columns=['foo', 'bar'])
732733

733734
col = 'range'
735+
734736
# this is an unordered categorical, but we allow this ####
735-
assert_frame_equal(
736-
result_sort, df.groupby(col, sort=True, observed=False).first())
737-
assert_frame_equal(
738-
result_nosort, df.groupby(col, sort=False, observed=False).first())
737+
result_sort = df.groupby(col, sort=True, observed=False).first()
738+
assert_frame_equal(result_sort, expected_sort)
739+
740+
result_nosort = df.groupby(col, sort=False, observed=False).first()
741+
assert_frame_equal(result_nosort, expected_nosort)
739742

740743

741744
def test_sort_datetimelike():

pandas/tests/reshape/test_pivot.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1136,7 +1136,7 @@ def test_categorical_pivot_index_ordering(self, observed):
11361136
index=expected_index,
11371137
columns=expected_columns)
11381138
if not observed:
1139-
result = result.dropna().astype(int)
1139+
result = result.dropna().astype(np.int64)
11401140

11411141
tm.assert_frame_equal(result, expected)
11421142

0 commit comments

Comments
 (0)