Skip to content

Commit 60f5d8b

Browse files
authored
REF: document casting behavior in groupby (#41376)
1 parent 93fb9d9 commit 60f5d8b

File tree

3 files changed

+29
-13
lines changed

3 files changed

+29
-13
lines changed

pandas/core/groupby/generic.py

+3-7
Original file line numberDiff line numberDiff line change
@@ -539,9 +539,6 @@ def _transform_general(self, func: Callable, *args, **kwargs) -> Series:
539539
object.__setattr__(group, "name", name)
540540
res = func(group, *args, **kwargs)
541541

542-
if isinstance(res, (DataFrame, Series)):
543-
res = res._values
544-
545542
results.append(klass(res, index=group.index))
546543

547544
# check for empty "results" to avoid concat ValueError
@@ -1251,12 +1248,11 @@ def _wrap_applied_output_series(
12511248
columns = key_index
12521249
stacked_values = stacked_values.T
12531250

1251+
if stacked_values.dtype == object:
1252+
# We'll have the DataFrame constructor do inference
1253+
stacked_values = stacked_values.tolist()
12541254
result = self.obj._constructor(stacked_values, index=index, columns=columns)
12551255

1256-
# if we have date/time like in the original, then coerce dates
1257-
# as we are stacking can easily have object dtypes here
1258-
result = result._convert(datetime=True)
1259-
12601256
if not self.as_index:
12611257
self._insert_inaxis_grouper_inplace(result)
12621258

pandas/core/groupby/groupby.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1329,7 +1329,10 @@ def _agg_py_fallback(
13291329
# reductions; see GH#28949
13301330
ser = df.iloc[:, 0]
13311331

1332-
res_values = self.grouper.agg_series(ser, alt)
1332+
# We do not get here with UDFs, so we know that our dtype
1333+
# should always be preserved by the implemented aggregations
1334+
# TODO: Is this exactly right; see WrappedCythonOp get_result_dtype?
1335+
res_values = self.grouper.agg_series(ser, alt, preserve_dtype=True)
13331336

13341337
if isinstance(values, Categorical):
13351338
# Because we only get here with known dtype-preserving

pandas/core/groupby/ops.py

+22-5
Original file line numberDiff line numberDiff line change
@@ -966,11 +966,24 @@ def _cython_operation(
966966
)
967967

968968
@final
969-
def agg_series(self, obj: Series, func: F) -> ArrayLike:
969+
def agg_series(
970+
self, obj: Series, func: F, preserve_dtype: bool = False
971+
) -> ArrayLike:
972+
"""
973+
Parameters
974+
----------
975+
obj : Series
976+
func : function taking a Series and returning a scalar-like
977+
preserve_dtype : bool
978+
Whether the aggregation is known to be dtype-preserving.
979+
980+
Returns
981+
-------
982+
np.ndarray or ExtensionArray
983+
"""
970984
# test_groupby_empty_with_category gets here with self.ngroups == 0
971985
# and len(obj) > 0
972986

973-
cast_back = True
974987
if len(obj) == 0:
975988
# SeriesGrouper would raise if we were to call _aggregate_series_fast
976989
result = self._aggregate_series_pure_python(obj, func)
@@ -982,17 +995,21 @@ def agg_series(self, obj: Series, func: F) -> ArrayLike:
982995
# TODO: can we get a performant workaround for EAs backed by ndarray?
983996
result = self._aggregate_series_pure_python(obj, func)
984997

998+
# we can preserve a little bit more aggressively with EA dtype
999+
# because maybe_cast_pointwise_result will do a try/except
1000+
# with _from_sequence. NB we are assuming here that _from_sequence
1001+
# is sufficiently strict that it casts appropriately.
1002+
preserve_dtype = True
1003+
9851004
elif obj.index._has_complex_internals:
9861005
# Preempt TypeError in _aggregate_series_fast
9871006
result = self._aggregate_series_pure_python(obj, func)
9881007

9891008
else:
9901009
result = self._aggregate_series_fast(obj, func)
991-
cast_back = False
9921010

9931011
npvalues = lib.maybe_convert_objects(result, try_float=False)
994-
if cast_back:
995-
# TODO: Is there a documented reason why we dont always cast_back?
1012+
if preserve_dtype:
9961013
out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True)
9971014
else:
9981015
out = npvalues

0 commit comments

Comments
 (0)