Skip to content

CLN: Break up wrap applied output #36536

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 22, 2020
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 69 additions & 56 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1210,64 +1210,77 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
self._insert_inaxis_grouper_inplace(result)
return result
else:
# this is to silence a DeprecationWarning
# TODO: Remove when default dtype of empty Series is object
kwargs = first_not_none._construct_axes_dict()
backup = create_series_with_explicit_dtype(dtype_if_empty=object, **kwargs)
values = [x if (x is not None) else backup for x in values]

all_indexed_same = all_indexes_same(x.index for x in values)

# GH3596
# provide a reduction (Frame -> Series) if groups are
# unique
if self.squeeze:
applied_index = self._selected_obj._get_axis(self.axis)
singular_series = len(values) == 1 and applied_index.nlevels == 1

# assign the name to this series
if singular_series:
values[0].name = keys[0]

# GH2893
# we have series in the values array, we want to
# produce a series:
# if any of the sub-series are not indexed the same
# OR we don't have a multi-index and we have only a
# single values
return self._concat_objects(
keys, values, not_indexed_same=not_indexed_same
)
# values are Series
return self.wrap_applied_output_series(
keys, values, not_indexed_same, first_not_none, key_index
)

# still a series
# path added as of GH 5545
elif all_indexed_same:
from pandas.core.reshape.concat import concat

return concat(values)

if not all_indexed_same:
# GH 8467
return self._concat_objects(keys, values, not_indexed_same=True)

# Combine values
# vstack+constructor is faster than concat and handles MI-columns
stacked_values = np.vstack([np.asarray(v) for v in values])

if self.axis == 0:
index = key_index
columns = first_not_none.index.copy()
if columns.name is None:
# GH6124 - propagate name of Series when it's consistent
names = {v.name for v in values}
if len(names) == 1:
columns.name = list(names)[0]
else:
index = first_not_none.index
columns = key_index
stacked_values = stacked_values.T
def _wrap_applied_output_series(
self,
keys,
values: List[Series],
not_indexed_same: bool,
first_not_none,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ideally if you can type the args (certainly can be a followon)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking this too, but wasn't certain of types. We can't rely on mypy to catch improper typing here because this is being called from untyped code. I can guess at types and run tests with assert isinstance(var, guess) to confirm the guess, and rely on good test coverage. Is that an appropriate way to approach here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure u can try that
ultimately we want to type all the way up and have mypy validate - but of course takes time and effort to get there

key_index,
) -> FrameOrSeriesUnion:
# this is to silence a DeprecationWarning
# TODO: Remove when default dtype of empty Series is object
kwargs = first_not_none._construct_axes_dict()
backup = create_series_with_explicit_dtype(dtype_if_empty=object, **kwargs)
values = [x if (x is not None) else backup for x in values]

all_indexed_same = all_indexes_same(x.index for x in values)

# GH3596
# provide a reduction (Frame -> Series) if groups are
# unique
if self.squeeze:
applied_index = self._selected_obj._get_axis(self.axis)
singular_series = len(values) == 1 and applied_index.nlevels == 1

# assign the name to this series
if singular_series:
values[0].name = keys[0]

# GH2893
# we have series in the values array, we want to
# produce a series:
# if any of the sub-series are not indexed the same
# OR we don't have a multi-index and we have only a
# single values
return self._concat_objects(
keys, values, not_indexed_same=not_indexed_same
)

# still a series
# path added as of GH 5545
elif all_indexed_same:
from pandas.core.reshape.concat import concat

return concat(values)

if not all_indexed_same:
# GH 8467
return self._concat_objects(keys, values, not_indexed_same=True)

# Combine values
# vstack+constructor is faster than concat and handles MI-columns
stacked_values = np.vstack([np.asarray(v) for v in values])

if self.axis == 0:
index = key_index
columns = first_not_none.index.copy()
if columns.name is None:
# GH6124 - propagate name of Series when it's consistent
names = {v.name for v in values}
if len(names) == 1:
columns.name = list(names)[0]
else:
index = first_not_none.index
columns = key_index
stacked_values = stacked_values.T

result = self.obj._constructor(stacked_values, index=index, columns=columns)
result = self.obj._constructor(stacked_values, index=index, columns=columns)

# if we have date/time like in the original, then coerce dates
# as we are stacking can easily have object dtypes here
Expand Down