@@ -966,11 +966,24 @@ def _cython_operation(
966
966
)
967
967
968
968
@final
969
- def agg_series (self , obj : Series , func : F ) -> ArrayLike :
969
+ def agg_series (
970
+ self , obj : Series , func : F , preserve_dtype : bool = False
971
+ ) -> ArrayLike :
972
+ """
973
+ Parameters
974
+ ----------
975
+ obj : Series
976
+ func : function taking a Series and returning a scalar-like
977
+ preserve_dtype : bool
978
+ Whether the aggregation is known to be dtype-preserving.
979
+
980
+ Returns
981
+ -------
982
+ np.ndarray or ExtensionArray
983
+ """
970
984
# test_groupby_empty_with_category gets here with self.ngroups == 0
971
985
# and len(obj) > 0
972
986
973
- cast_back = True
974
987
if len (obj ) == 0 :
975
988
# SeriesGrouper would raise if we were to call _aggregate_series_fast
976
989
result = self ._aggregate_series_pure_python (obj , func )
@@ -982,17 +995,21 @@ def agg_series(self, obj: Series, func: F) -> ArrayLike:
982
995
# TODO: can we get a performant workaround for EAs backed by ndarray?
983
996
result = self ._aggregate_series_pure_python (obj , func )
984
997
998
+ # we can preserve a little bit more aggressively with EA dtype
999
+ # because maybe_cast_pointwise_result will do a try/except
1000
+ # with _from_sequence. NB we are assuming here that _from_sequence
1001
+ # is sufficiently strict that it casts appropriately.
1002
+ preserve_dtype = True
1003
+
985
1004
elif obj .index ._has_complex_internals :
986
1005
# Preempt TypeError in _aggregate_series_fast
987
1006
result = self ._aggregate_series_pure_python (obj , func )
988
1007
989
1008
else :
990
1009
result = self ._aggregate_series_fast (obj , func )
991
- cast_back = False
992
1010
993
1011
npvalues = lib .maybe_convert_objects (result , try_float = False )
994
- if cast_back :
995
- # TODO: Is there a documented reason why we dont always cast_back?
1012
+ if preserve_dtype :
996
1013
out = maybe_cast_pointwise_result (npvalues , obj .dtype , numeric_only = True )
997
1014
else :
998
1015
out = npvalues
0 commit comments