-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
CLN: remove unreachable code in pandas/core/groupby/generic.py::DataFrameGroupBy::_transform_general #32583
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CLN: remove unreachable code in pandas/core/groupby/generic.py::DataFrameGroupBy::_transform_general #32583
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1202,158 +1202,147 @@ def first_not_none(values): | |
# GH9684. If all values are None, then this will throw an error. | ||
# We'd prefer it return an empty dataframe. | ||
return DataFrame() | ||
|
||
elif isinstance(v, DataFrame): | ||
return self._concat_objects(keys, values, not_indexed_same=not_indexed_same) | ||
elif self.grouper.groupings is not None: | ||
if len(self.grouper.groupings) > 1: | ||
key_index = self.grouper.result_index | ||
|
||
else: | ||
ping = self.grouper.groupings[0] | ||
if len(keys) == ping.ngroups: | ||
key_index = ping.group_index | ||
key_index.name = key_names[0] | ||
|
||
key_lookup = Index(keys) | ||
indexer = key_lookup.get_indexer(key_index) | ||
if len(self.grouper.groupings) > 1: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this possible to be None? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For
and
So, it still cannot be I'm still making sense of all this group-by code though, so I'll still look to see if there's anything I've missed |
||
key_index = self.grouper.result_index | ||
else: | ||
ping = self.grouper.groupings[0] | ||
if len(keys) == ping.ngroups: | ||
key_index = ping.group_index | ||
key_index.name = key_names[0] | ||
|
||
# reorder the values | ||
values = [values[i] for i in indexer] | ||
else: | ||
key_lookup = Index(keys) | ||
indexer = key_lookup.get_indexer(key_index) | ||
|
||
key_index = Index(keys, name=key_names[0]) | ||
# reorder the values | ||
values = [values[i] for i in indexer] | ||
else: | ||
|
||
# don't use the key indexer | ||
if not self.as_index: | ||
key_index = None | ||
key_index = Index(keys, name=key_names[0]) | ||
|
||
# make Nones an empty object | ||
v = first_not_none(values) | ||
if v is None: | ||
return DataFrame() | ||
elif isinstance(v, NDFrame): | ||
# don't use the key indexer | ||
if not self.as_index: | ||
key_index = None | ||
|
||
# this is to silence a DeprecationWarning | ||
# TODO: Remove when default dtype of empty Series is object | ||
kwargs = v._construct_axes_dict() | ||
if v._constructor is Series: | ||
backup = create_series_with_explicit_dtype( | ||
**kwargs, dtype_if_empty=object | ||
) | ||
else: | ||
backup = v._constructor(**kwargs) | ||
|
||
values = [x if (x is not None) else backup for x in values] | ||
|
||
v = values[0] | ||
|
||
if isinstance(v, (np.ndarray, Index, Series)): | ||
if isinstance(v, Series): | ||
applied_index = self._selected_obj._get_axis(self.axis) | ||
all_indexed_same = all_indexes_same([x.index for x in values]) | ||
singular_series = len(values) == 1 and applied_index.nlevels == 1 | ||
|
||
# GH3596 | ||
# provide a reduction (Frame -> Series) if groups are | ||
# unique | ||
if self.squeeze: | ||
# assign the name to this series | ||
if singular_series: | ||
values[0].name = keys[0] | ||
|
||
# GH2893 | ||
# we have series in the values array, we want to | ||
# produce a series: | ||
# if any of the sub-series are not indexed the same | ||
# OR we don't have a multi-index and we have only a | ||
# single values | ||
return self._concat_objects( | ||
keys, values, not_indexed_same=not_indexed_same | ||
) | ||
|
||
# still a series | ||
# path added as of GH 5545 | ||
elif all_indexed_same: | ||
from pandas.core.reshape.concat import concat | ||
|
||
return concat(values) | ||
|
||
if not all_indexed_same: | ||
# GH 8467 | ||
return self._concat_objects(keys, values, not_indexed_same=True) | ||
|
||
if self.axis == 0 and isinstance(v, ABCSeries): | ||
# GH6124 if the list of Series have a consistent name, | ||
# then propagate that name to the result. | ||
index = v.index.copy() | ||
if index.name is None: | ||
# Only propagate the series name to the result | ||
# if all series have a consistent name. If the | ||
# series do not have a consistent name, do | ||
# nothing. | ||
names = {v.name for v in values} | ||
if len(names) == 1: | ||
index.name = list(names)[0] | ||
|
||
# normally use vstack as its faster than concat | ||
# and if we have mi-columns | ||
if ( | ||
isinstance(v.index, MultiIndex) | ||
or key_index is None | ||
or isinstance(key_index, MultiIndex) | ||
): | ||
stacked_values = np.vstack([np.asarray(v) for v in values]) | ||
result = DataFrame( | ||
stacked_values, index=key_index, columns=index | ||
# make Nones an empty object | ||
v = first_not_none(values) | ||
if v is None: | ||
return DataFrame() | ||
elif isinstance(v, NDFrame): | ||
|
||
# this is to silence a DeprecationWarning | ||
# TODO: Remove when default dtype of empty Series is object | ||
kwargs = v._construct_axes_dict() | ||
if v._constructor is Series: | ||
backup = create_series_with_explicit_dtype( | ||
**kwargs, dtype_if_empty=object | ||
) | ||
else: | ||
backup = v._constructor(**kwargs) | ||
|
||
values = [x if (x is not None) else backup for x in values] | ||
|
||
v = values[0] | ||
|
||
if isinstance(v, (np.ndarray, Index, Series)): | ||
if isinstance(v, Series): | ||
applied_index = self._selected_obj._get_axis(self.axis) | ||
all_indexed_same = all_indexes_same([x.index for x in values]) | ||
singular_series = len(values) == 1 and applied_index.nlevels == 1 | ||
|
||
# GH3596 | ||
# provide a reduction (Frame -> Series) if groups are | ||
# unique | ||
if self.squeeze: | ||
# assign the name to this series | ||
if singular_series: | ||
values[0].name = keys[0] | ||
|
||
# GH2893 | ||
# we have series in the values array, we want to | ||
# produce a series: | ||
# if any of the sub-series are not indexed the same | ||
# OR we don't have a multi-index and we have only a | ||
# single values | ||
return self._concat_objects( | ||
keys, values, not_indexed_same=not_indexed_same | ||
) | ||
else: | ||
# GH5788 instead of stacking; concat gets the | ||
# dtypes correct | ||
|
||
# still a series | ||
# path added as of GH 5545 | ||
elif all_indexed_same: | ||
from pandas.core.reshape.concat import concat | ||
|
||
result = concat( | ||
values, | ||
keys=key_index, | ||
names=key_index.names, | ||
axis=self.axis, | ||
).unstack() | ||
result.columns = index | ||
elif isinstance(v, ABCSeries): | ||
return concat(values) | ||
|
||
if not all_indexed_same: | ||
# GH 8467 | ||
return self._concat_objects(keys, values, not_indexed_same=True) | ||
|
||
if self.axis == 0 and isinstance(v, ABCSeries): | ||
# GH6124 if the list of Series have a consistent name, | ||
# then propagate that name to the result. | ||
index = v.index.copy() | ||
if index.name is None: | ||
# Only propagate the series name to the result | ||
# if all series have a consistent name. If the | ||
# series do not have a consistent name, do | ||
# nothing. | ||
names = {v.name for v in values} | ||
if len(names) == 1: | ||
index.name = list(names)[0] | ||
|
||
# normally use vstack as its faster than concat | ||
# and if we have mi-columns | ||
if ( | ||
isinstance(v.index, MultiIndex) | ||
or key_index is None | ||
or isinstance(key_index, MultiIndex) | ||
): | ||
stacked_values = np.vstack([np.asarray(v) for v in values]) | ||
result = DataFrame( | ||
stacked_values.T, index=v.index, columns=key_index | ||
) | ||
result = DataFrame(stacked_values, index=key_index, columns=index) | ||
else: | ||
# GH#1738: values is list of arrays of unequal lengths | ||
# fall through to the outer else clause | ||
# TODO: sure this is right? we used to do this | ||
# after raising AttributeError above | ||
return Series(values, index=key_index, name=self._selection_name) | ||
|
||
# if we have date/time like in the original, then coerce dates | ||
# as we are stacking can easily have object dtypes here | ||
so = self._selected_obj | ||
if so.ndim == 2 and so.dtypes.apply(needs_i8_conversion).any(): | ||
result = _recast_datetimelike_result(result) | ||
else: | ||
result = result._convert(datetime=True) | ||
|
||
return self._reindex_output(result) | ||
|
||
# values are not series or array-like but scalars | ||
# GH5788 instead of stacking; concat gets the | ||
# dtypes correct | ||
from pandas.core.reshape.concat import concat | ||
|
||
result = concat( | ||
values, keys=key_index, names=key_index.names, axis=self.axis, | ||
).unstack() | ||
result.columns = index | ||
elif isinstance(v, ABCSeries): | ||
stacked_values = np.vstack([np.asarray(v) for v in values]) | ||
result = DataFrame(stacked_values.T, index=v.index, columns=key_index) | ||
else: | ||
# only coerce dates if we find at least 1 datetime | ||
should_coerce = any(isinstance(x, Timestamp) for x in values) | ||
# self._selection_name not passed through to Series as the | ||
# result should not take the name of original selection | ||
# of columns | ||
return Series(values, index=key_index)._convert( | ||
datetime=True, coerce=should_coerce | ||
) | ||
# GH#1738: values is list of arrays of unequal lengths | ||
# fall through to the outer else clause | ||
# TODO: sure this is right? we used to do this | ||
# after raising AttributeError above | ||
return Series(values, index=key_index, name=self._selection_name) | ||
|
||
# if we have date/time like in the original, then coerce dates | ||
# as we are stacking can easily have object dtypes here | ||
so = self._selected_obj | ||
if so.ndim == 2 and so.dtypes.apply(needs_i8_conversion).any(): | ||
result = _recast_datetimelike_result(result) | ||
else: | ||
result = result._convert(datetime=True) | ||
|
||
return self._reindex_output(result) | ||
|
||
# values are not series or array-like but scalars | ||
else: | ||
# Handle cases like BinGrouper | ||
return self._concat_objects(keys, values, not_indexed_same=not_indexed_same) | ||
# only coerce dates if we find at least 1 datetime | ||
should_coerce = any(isinstance(x, Timestamp) for x in values) | ||
# self._selection_name not passed through to Series as the | ||
# result should not take the name of original selection | ||
# of columns | ||
return Series(values, index=key_index)._convert( | ||
datetime=True, coerce=should_coerce | ||
) | ||
|
||
def _transform_general(self, func, *args, **kwargs): | ||
from pandas.core.reshape.concat import concat | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Little hard to tell from the diff - is removing this branch the only change?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I also changed the
elif
s after thereturn
statements toif
. This was only on line 1206 (new version). However, I've reverted that change.Then, I removed
so I could shift the block after it back by 4 spaces.
Shall I only remove the last branch if that makes it easier to review?