|
44 | 44 | ensure_int64,
|
45 | 45 | ensure_platform_int,
|
46 | 46 | is_bool,
|
| 47 | + is_categorical_dtype, |
47 | 48 | is_integer_dtype,
|
48 | 49 | is_interval_dtype,
|
49 | 50 | is_numeric_dtype,
|
|
59 | 60 | normalize_keyword_aggregation,
|
60 | 61 | )
|
61 | 62 | import pandas.core.algorithms as algorithms
|
| 63 | +from pandas.core.arrays.categorical import Categorical |
62 | 64 | from pandas.core.base import DataError, SpecificationError
|
63 | 65 | import pandas.core.common as com
|
64 | 66 | from pandas.core.construction import create_series_with_explicit_dtype
|
@@ -1056,9 +1058,22 @@ def _cython_agg_blocks(
|
1056 | 1058 | result = no_result
|
1057 | 1059 | locs = block.mgr_locs.as_array
|
1058 | 1060 | try:
|
1059 |
| - result, _ = self.grouper.aggregate( |
1060 |
| - block.values, how, axis=1, min_count=min_count |
1061 |
| - ) |
| 1061 | + |
| 1062 | + if is_categorical_dtype(block.values) and how in ('first', 'last'): |
| 1063 | + # perf improvement: aggregate categorical codes with fast cython agg |
| 1064 | + # for select `how` operations |
| 1065 | + result_codes, _ = self.grouper.aggregate( |
| 1066 | + block.values.codes, how, axis=1, min_count=min_count |
| 1067 | + ) |
| 1068 | + result = Categorical.from_codes( |
| 1069 | + result_codes, |
| 1070 | + categories=block.values.categories, |
| 1071 | + ordered=block.values.ordered, |
| 1072 | + ) |
| 1073 | + else: |
| 1074 | + result, _ = self.grouper.aggregate( |
| 1075 | + block.values, how, axis=1, min_count=min_count |
| 1076 | + ) |
1062 | 1077 | except NotImplementedError:
|
1063 | 1078 | # generally if we have numeric_only=False
|
1064 | 1079 | # and non-applicable functions
|
|
0 commit comments