Skip to content

CLN: remove unreachable code in pandas/core/groupby/generic.py::DataFrameGroupBy::_transform_general #32583

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
261 changes: 125 additions & 136 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1202,158 +1202,147 @@ def first_not_none(values):
# GH9684. If all values are None, then this will throw an error.
# We'd prefer it return an empty dataframe.
return DataFrame()

elif isinstance(v, DataFrame):
return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
elif self.grouper.groupings is not None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Little hard to tell from the diff - is removing this branch the only change?

Copy link
Member Author

@MarcoGorelli MarcoGorelli Mar 14, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also changed the elifs after the return statements to if. This was only on line 1206 (new version). However, I've reverted that change.

Then, I removed

elif self.grouper.groupings is not None:

so I could shift the block after it back by 4 spaces.

Shall I only remove the last branch if that makes it easier to review?

if len(self.grouper.groupings) > 1:
key_index = self.grouper.result_index

else:
ping = self.grouper.groupings[0]
if len(keys) == ping.ngroups:
key_index = ping.group_index
key_index.name = key_names[0]

key_lookup = Index(keys)
indexer = key_lookup.get_indexer(key_index)
if len(self.grouper.groupings) > 1:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this possible to be None?

Copy link
Member Author

@MarcoGorelli MarcoGorelli Mar 14, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For BaseGrouper, .groupings needs to be a list, so in that case, no, as inside its __init__ method there is

        self._groupings: List[grouper.Grouping] = list(groupings)

and .groupings simply returns ._groupings.

BinGrouper inherits from BaseGrouper, but changes its .groupings definition to

    @property
    def groupings(self) -> "List[grouper.Grouping]":
        return [
            grouper.Grouping(lvl, lvl, in_axis=False, level=None, name=name)
            for lvl, name in zip(self.levels, self.names)
        ]

So, it still cannot be None.

I'm still making sense of all this group-by code though, so I'll still look to see if there's anything I've missed

key_index = self.grouper.result_index
else:
ping = self.grouper.groupings[0]
if len(keys) == ping.ngroups:
key_index = ping.group_index
key_index.name = key_names[0]

# reorder the values
values = [values[i] for i in indexer]
else:
key_lookup = Index(keys)
indexer = key_lookup.get_indexer(key_index)

key_index = Index(keys, name=key_names[0])
# reorder the values
values = [values[i] for i in indexer]
else:

# don't use the key indexer
if not self.as_index:
key_index = None
key_index = Index(keys, name=key_names[0])

# make Nones an empty object
v = first_not_none(values)
if v is None:
return DataFrame()
elif isinstance(v, NDFrame):
# don't use the key indexer
if not self.as_index:
key_index = None

# this is to silence a DeprecationWarning
# TODO: Remove when default dtype of empty Series is object
kwargs = v._construct_axes_dict()
if v._constructor is Series:
backup = create_series_with_explicit_dtype(
**kwargs, dtype_if_empty=object
)
else:
backup = v._constructor(**kwargs)

values = [x if (x is not None) else backup for x in values]

v = values[0]

if isinstance(v, (np.ndarray, Index, Series)):
if isinstance(v, Series):
applied_index = self._selected_obj._get_axis(self.axis)
all_indexed_same = all_indexes_same([x.index for x in values])
singular_series = len(values) == 1 and applied_index.nlevels == 1

# GH3596
# provide a reduction (Frame -> Series) if groups are
# unique
if self.squeeze:
# assign the name to this series
if singular_series:
values[0].name = keys[0]

# GH2893
# we have series in the values array, we want to
# produce a series:
# if any of the sub-series are not indexed the same
# OR we don't have a multi-index and we have only a
# single values
return self._concat_objects(
keys, values, not_indexed_same=not_indexed_same
)

# still a series
# path added as of GH 5545
elif all_indexed_same:
from pandas.core.reshape.concat import concat

return concat(values)

if not all_indexed_same:
# GH 8467
return self._concat_objects(keys, values, not_indexed_same=True)

if self.axis == 0 and isinstance(v, ABCSeries):
# GH6124 if the list of Series have a consistent name,
# then propagate that name to the result.
index = v.index.copy()
if index.name is None:
# Only propagate the series name to the result
# if all series have a consistent name. If the
# series do not have a consistent name, do
# nothing.
names = {v.name for v in values}
if len(names) == 1:
index.name = list(names)[0]

# normally use vstack as its faster than concat
# and if we have mi-columns
if (
isinstance(v.index, MultiIndex)
or key_index is None
or isinstance(key_index, MultiIndex)
):
stacked_values = np.vstack([np.asarray(v) for v in values])
result = DataFrame(
stacked_values, index=key_index, columns=index
# make Nones an empty object
v = first_not_none(values)
if v is None:
return DataFrame()
elif isinstance(v, NDFrame):

# this is to silence a DeprecationWarning
# TODO: Remove when default dtype of empty Series is object
kwargs = v._construct_axes_dict()
if v._constructor is Series:
backup = create_series_with_explicit_dtype(
**kwargs, dtype_if_empty=object
)
else:
backup = v._constructor(**kwargs)

values = [x if (x is not None) else backup for x in values]

v = values[0]

if isinstance(v, (np.ndarray, Index, Series)):
if isinstance(v, Series):
applied_index = self._selected_obj._get_axis(self.axis)
all_indexed_same = all_indexes_same([x.index for x in values])
singular_series = len(values) == 1 and applied_index.nlevels == 1

# GH3596
# provide a reduction (Frame -> Series) if groups are
# unique
if self.squeeze:
# assign the name to this series
if singular_series:
values[0].name = keys[0]

# GH2893
# we have series in the values array, we want to
# produce a series:
# if any of the sub-series are not indexed the same
# OR we don't have a multi-index and we have only a
# single values
return self._concat_objects(
keys, values, not_indexed_same=not_indexed_same
)
else:
# GH5788 instead of stacking; concat gets the
# dtypes correct

# still a series
# path added as of GH 5545
elif all_indexed_same:
from pandas.core.reshape.concat import concat

result = concat(
values,
keys=key_index,
names=key_index.names,
axis=self.axis,
).unstack()
result.columns = index
elif isinstance(v, ABCSeries):
return concat(values)

if not all_indexed_same:
# GH 8467
return self._concat_objects(keys, values, not_indexed_same=True)

if self.axis == 0 and isinstance(v, ABCSeries):
# GH6124 if the list of Series have a consistent name,
# then propagate that name to the result.
index = v.index.copy()
if index.name is None:
# Only propagate the series name to the result
# if all series have a consistent name. If the
# series do not have a consistent name, do
# nothing.
names = {v.name for v in values}
if len(names) == 1:
index.name = list(names)[0]

# normally use vstack as its faster than concat
# and if we have mi-columns
if (
isinstance(v.index, MultiIndex)
or key_index is None
or isinstance(key_index, MultiIndex)
):
stacked_values = np.vstack([np.asarray(v) for v in values])
result = DataFrame(
stacked_values.T, index=v.index, columns=key_index
)
result = DataFrame(stacked_values, index=key_index, columns=index)
else:
# GH#1738: values is list of arrays of unequal lengths
# fall through to the outer else clause
# TODO: sure this is right? we used to do this
# after raising AttributeError above
return Series(values, index=key_index, name=self._selection_name)

# if we have date/time like in the original, then coerce dates
# as we are stacking can easily have object dtypes here
so = self._selected_obj
if so.ndim == 2 and so.dtypes.apply(needs_i8_conversion).any():
result = _recast_datetimelike_result(result)
else:
result = result._convert(datetime=True)

return self._reindex_output(result)

# values are not series or array-like but scalars
# GH5788 instead of stacking; concat gets the
# dtypes correct
from pandas.core.reshape.concat import concat

result = concat(
values, keys=key_index, names=key_index.names, axis=self.axis,
).unstack()
result.columns = index
elif isinstance(v, ABCSeries):
stacked_values = np.vstack([np.asarray(v) for v in values])
result = DataFrame(stacked_values.T, index=v.index, columns=key_index)
else:
# only coerce dates if we find at least 1 datetime
should_coerce = any(isinstance(x, Timestamp) for x in values)
# self._selection_name not passed through to Series as the
# result should not take the name of original selection
# of columns
return Series(values, index=key_index)._convert(
datetime=True, coerce=should_coerce
)
# GH#1738: values is list of arrays of unequal lengths
# fall through to the outer else clause
# TODO: sure this is right? we used to do this
# after raising AttributeError above
return Series(values, index=key_index, name=self._selection_name)

# if we have date/time like in the original, then coerce dates
# as we are stacking can easily have object dtypes here
so = self._selected_obj
if so.ndim == 2 and so.dtypes.apply(needs_i8_conversion).any():
result = _recast_datetimelike_result(result)
else:
result = result._convert(datetime=True)

return self._reindex_output(result)

# values are not series or array-like but scalars
else:
# Handle cases like BinGrouper
return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
# only coerce dates if we find at least 1 datetime
should_coerce = any(isinstance(x, Timestamp) for x in values)
# self._selection_name not passed through to Series as the
# result should not take the name of original selection
# of columns
return Series(values, index=key_index)._convert(
datetime=True, coerce=should_coerce
)

def _transform_general(self, func, *args, **kwargs):
from pandas.core.reshape.concat import concat
Expand Down