Skip to content

CLN: _wrap_applied_output #36160

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 6, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
191 changes: 91 additions & 100 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1192,113 +1192,104 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
return self.obj._constructor()
elif isinstance(first_not_none, DataFrame):
return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
else:
key_index = self.grouper.result_index if self.as_index else None

if isinstance(first_not_none, Series):
# this is to silence a DeprecationWarning
# TODO: Remove when default dtype of empty Series is object
kwargs = first_not_none._construct_axes_dict()
backup = create_series_with_explicit_dtype(
dtype_if_empty=object, **kwargs
)

values = [x if (x is not None) else backup for x in values]

v = values[0]

if not isinstance(v, (np.ndarray, Index, Series)) and self.as_index:
# values are not series or array-like but scalars
# self._selection_name not passed through to Series as the
# result should not take the name of original selection
# of columns
return self.obj._constructor_sliced(values, index=key_index)
key_index = self.grouper.result_index if self.as_index else None

if isinstance(first_not_none, Series):
# this is to silence a DeprecationWarning
# TODO: Remove when default dtype of empty Series is object
kwargs = first_not_none._construct_axes_dict()
backup = create_series_with_explicit_dtype(dtype_if_empty=object, **kwargs)

values = [x if (x is not None) else backup for x in values]

v = values[0]

if not isinstance(v, (np.ndarray, Index, Series)) and self.as_index:
# values are not series or array-like but scalars
# self._selection_name not passed through to Series as the
# result should not take the name of original selection
# of columns
return self.obj._constructor_sliced(values, index=key_index)

if isinstance(v, Series):
all_indexed_same = all_indexes_same((x.index for x in values))

# GH3596
# provide a reduction (Frame -> Series) if groups are
# unique
if self.squeeze:
applied_index = self._selected_obj._get_axis(self.axis)
singular_series = len(values) == 1 and applied_index.nlevels == 1

# assign the name to this series
if singular_series:
values[0].name = keys[0]

# GH2893
# we have series in the values array, we want to
# produce a series:
# if any of the sub-series are not indexed the same
# OR we don't have a multi-index and we have only a
# single values
return self._concat_objects(
keys, values, not_indexed_same=not_indexed_same
)

# still a series
# path added as of GH 5545
elif all_indexed_same:
from pandas.core.reshape.concat import concat

return concat(values)

if not all_indexed_same:
# GH 8467
return self._concat_objects(keys, values, not_indexed_same=True)

# Combine values
# vstack+constructor is faster than concat and handles MI-columns
stacked_values = np.vstack([np.asarray(v) for v in values])

if self.axis == 0:
index = key_index
columns = v.index.copy()
if columns.name is None:
# GH6124 - propagate name of Series when it's consistent
names = {v.name for v in values}
if len(names) == 1:
columns.name = list(names)[0]
else:
if isinstance(v, Series):
all_indexed_same = all_indexes_same((x.index for x in values))

# GH3596
# provide a reduction (Frame -> Series) if groups are
# unique
if self.squeeze:
applied_index = self._selected_obj._get_axis(self.axis)
singular_series = (
len(values) == 1 and applied_index.nlevels == 1
)

# assign the name to this series
if singular_series:
values[0].name = keys[0]

# GH2893
# we have series in the values array, we want to
# produce a series:
# if any of the sub-series are not indexed the same
# OR we don't have a multi-index and we have only a
# single values
return self._concat_objects(
keys, values, not_indexed_same=not_indexed_same
)

# still a series
# path added as of GH 5545
elif all_indexed_same:
from pandas.core.reshape.concat import concat

return concat(values)

if not all_indexed_same:
# GH 8467
return self._concat_objects(keys, values, not_indexed_same=True)

# Combine values
# vstack+constructor is faster than concat and handles MI-columns
stacked_values = np.vstack([np.asarray(v) for v in values])

if self.axis == 0:
index = key_index
columns = v.index.copy()
if columns.name is None:
# GH6124 - propagate name of Series when it's consistent
names = {v.name for v in values}
if len(names) == 1:
columns.name = list(names)[0]
else:
index = v.index
columns = key_index
stacked_values = stacked_values.T

result = self.obj._constructor(
stacked_values, index=index, columns=columns
)
index = v.index
columns = key_index
stacked_values = stacked_values.T

elif not self.as_index:
# We add grouping column below, so create a frame here
result = DataFrame(
values, index=key_index, columns=[self._selection]
)
else:
# GH#1738: values is list of arrays of unequal lengths
# fall through to the outer else clause
# TODO: sure this is right? we used to do this
# after raising AttributeError above
return self.obj._constructor_sliced(
values, index=key_index, name=self._selection_name
)
result = self.obj._constructor(stacked_values, index=index, columns=columns)

# if we have date/time like in the original, then coerce dates
# as we are stacking can easily have object dtypes here
so = self._selected_obj
if so.ndim == 2 and so.dtypes.apply(needs_i8_conversion).any():
result = _recast_datetimelike_result(result)
else:
result = result._convert(datetime=True)
elif not self.as_index:
# We add grouping column below, so create a frame here
result = DataFrame(values, index=key_index, columns=[self._selection])
else:
# GH#1738: values is list of arrays of unequal lengths
# fall through to the outer else clause
# TODO: sure this is right? we used to do this
# after raising AttributeError above
return self.obj._constructor_sliced(
values, index=key_index, name=self._selection_name
)

# if we have date/time like in the original, then coerce dates
# as we are stacking can easily have object dtypes here
so = self._selected_obj
if so.ndim == 2 and so.dtypes.apply(needs_i8_conversion).any():
result = _recast_datetimelike_result(result)
else:
result = result._convert(datetime=True)

if not self.as_index:
self._insert_inaxis_grouper_inplace(result)
if not self.as_index:
self._insert_inaxis_grouper_inplace(result)

return self._reindex_output(result)
return self._reindex_output(result)

def _transform_general(
self, func, *args, engine="cython", engine_kwargs=None, **kwargs
Expand Down