diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 4102b8527b6aa..a7cee5106af30 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1202,158 +1202,147 @@ def first_not_none(values): # GH9684. If all values are None, then this will throw an error. # We'd prefer it return an empty dataframe. return DataFrame() + elif isinstance(v, DataFrame): return self._concat_objects(keys, values, not_indexed_same=not_indexed_same) - elif self.grouper.groupings is not None: - if len(self.grouper.groupings) > 1: - key_index = self.grouper.result_index - - else: - ping = self.grouper.groupings[0] - if len(keys) == ping.ngroups: - key_index = ping.group_index - key_index.name = key_names[0] - key_lookup = Index(keys) - indexer = key_lookup.get_indexer(key_index) + if len(self.grouper.groupings) > 1: + key_index = self.grouper.result_index + else: + ping = self.grouper.groupings[0] + if len(keys) == ping.ngroups: + key_index = ping.group_index + key_index.name = key_names[0] - # reorder the values - values = [values[i] for i in indexer] - else: + key_lookup = Index(keys) + indexer = key_lookup.get_indexer(key_index) - key_index = Index(keys, name=key_names[0]) + # reorder the values + values = [values[i] for i in indexer] + else: - # don't use the key indexer - if not self.as_index: - key_index = None + key_index = Index(keys, name=key_names[0]) - # make Nones an empty object - v = first_not_none(values) - if v is None: - return DataFrame() - elif isinstance(v, NDFrame): + # don't use the key indexer + if not self.as_index: + key_index = None - # this is to silence a DeprecationWarning - # TODO: Remove when default dtype of empty Series is object - kwargs = v._construct_axes_dict() - if v._constructor is Series: - backup = create_series_with_explicit_dtype( - **kwargs, dtype_if_empty=object - ) - else: - backup = v._constructor(**kwargs) - - values = [x if (x is not None) else backup for x in values] - - v = values[0] - - if isinstance(v, (np.ndarray, Index, Series)): - if isinstance(v, Series): - applied_index = self._selected_obj._get_axis(self.axis) - all_indexed_same = all_indexes_same([x.index for x in values]) - singular_series = len(values) == 1 and applied_index.nlevels == 1 - - # GH3596 - # provide a reduction (Frame -> Series) if groups are - # unique - if self.squeeze: - # assign the name to this series - if singular_series: - values[0].name = keys[0] - - # GH2893 - # we have series in the values array, we want to - # produce a series: - # if any of the sub-series are not indexed the same - # OR we don't have a multi-index and we have only a - # single values - return self._concat_objects( - keys, values, not_indexed_same=not_indexed_same - ) - - # still a series - # path added as of GH 5545 - elif all_indexed_same: - from pandas.core.reshape.concat import concat - - return concat(values) - - if not all_indexed_same: - # GH 8467 - return self._concat_objects(keys, values, not_indexed_same=True) - - if self.axis == 0 and isinstance(v, ABCSeries): - # GH6124 if the list of Series have a consistent name, - # then propagate that name to the result. - index = v.index.copy() - if index.name is None: - # Only propagate the series name to the result - # if all series have a consistent name. If the - # series do not have a consistent name, do - # nothing. - names = {v.name for v in values} - if len(names) == 1: - index.name = list(names)[0] - - # normally use vstack as its faster than concat - # and if we have mi-columns - if ( - isinstance(v.index, MultiIndex) - or key_index is None - or isinstance(key_index, MultiIndex) - ): - stacked_values = np.vstack([np.asarray(v) for v in values]) - result = DataFrame( - stacked_values, index=key_index, columns=index + # make Nones an empty object + v = first_not_none(values) + if v is None: + return DataFrame() + elif isinstance(v, NDFrame): + + # this is to silence a DeprecationWarning + # TODO: Remove when default dtype of empty Series is object + kwargs = v._construct_axes_dict() + if v._constructor is Series: + backup = create_series_with_explicit_dtype( + **kwargs, dtype_if_empty=object + ) + else: + backup = v._constructor(**kwargs) + + values = [x if (x is not None) else backup for x in values] + + v = values[0] + + if isinstance(v, (np.ndarray, Index, Series)): + if isinstance(v, Series): + applied_index = self._selected_obj._get_axis(self.axis) + all_indexed_same = all_indexes_same([x.index for x in values]) + singular_series = len(values) == 1 and applied_index.nlevels == 1 + + # GH3596 + # provide a reduction (Frame -> Series) if groups are + # unique + if self.squeeze: + # assign the name to this series + if singular_series: + values[0].name = keys[0] + + # GH2893 + # we have series in the values array, we want to + # produce a series: + # if any of the sub-series are not indexed the same + # OR we don't have a multi-index and we have only a + # single values + return self._concat_objects( + keys, values, not_indexed_same=not_indexed_same ) - else: - # GH5788 instead of stacking; concat gets the - # dtypes correct + + # still a series + # path added as of GH 5545 + elif all_indexed_same: from pandas.core.reshape.concat import concat - result = concat( - values, - keys=key_index, - names=key_index.names, - axis=self.axis, - ).unstack() - result.columns = index - elif isinstance(v, ABCSeries): + return concat(values) + + if not all_indexed_same: + # GH 8467 + return self._concat_objects(keys, values, not_indexed_same=True) + + if self.axis == 0 and isinstance(v, ABCSeries): + # GH6124 if the list of Series have a consistent name, + # then propagate that name to the result. + index = v.index.copy() + if index.name is None: + # Only propagate the series name to the result + # if all series have a consistent name. If the + # series do not have a consistent name, do + # nothing. + names = {v.name for v in values} + if len(names) == 1: + index.name = list(names)[0] + + # normally use vstack as its faster than concat + # and if we have mi-columns + if ( + isinstance(v.index, MultiIndex) + or key_index is None + or isinstance(key_index, MultiIndex) + ): stacked_values = np.vstack([np.asarray(v) for v in values]) - result = DataFrame( - stacked_values.T, index=v.index, columns=key_index - ) + result = DataFrame(stacked_values, index=key_index, columns=index) else: - # GH#1738: values is list of arrays of unequal lengths - # fall through to the outer else clause - # TODO: sure this is right? we used to do this - # after raising AttributeError above - return Series(values, index=key_index, name=self._selection_name) - - # if we have date/time like in the original, then coerce dates - # as we are stacking can easily have object dtypes here - so = self._selected_obj - if so.ndim == 2 and so.dtypes.apply(needs_i8_conversion).any(): - result = _recast_datetimelike_result(result) - else: - result = result._convert(datetime=True) - - return self._reindex_output(result) - - # values are not series or array-like but scalars + # GH5788 instead of stacking; concat gets the + # dtypes correct + from pandas.core.reshape.concat import concat + + result = concat( + values, keys=key_index, names=key_index.names, axis=self.axis, + ).unstack() + result.columns = index + elif isinstance(v, ABCSeries): + stacked_values = np.vstack([np.asarray(v) for v in values]) + result = DataFrame(stacked_values.T, index=v.index, columns=key_index) else: - # only coerce dates if we find at least 1 datetime - should_coerce = any(isinstance(x, Timestamp) for x in values) - # self._selection_name not passed through to Series as the - # result should not take the name of original selection - # of columns - return Series(values, index=key_index)._convert( - datetime=True, coerce=should_coerce - ) + # GH#1738: values is list of arrays of unequal lengths + # fall through to the outer else clause + # TODO: sure this is right? we used to do this + # after raising AttributeError above + return Series(values, index=key_index, name=self._selection_name) + + # if we have date/time like in the original, then coerce dates + # as we are stacking can easily have object dtypes here + so = self._selected_obj + if so.ndim == 2 and so.dtypes.apply(needs_i8_conversion).any(): + result = _recast_datetimelike_result(result) + else: + result = result._convert(datetime=True) + + return self._reindex_output(result) + # values are not series or array-like but scalars else: - # Handle cases like BinGrouper - return self._concat_objects(keys, values, not_indexed_same=not_indexed_same) + # only coerce dates if we find at least 1 datetime + should_coerce = any(isinstance(x, Timestamp) for x in values) + # self._selection_name not passed through to Series as the + # result should not take the name of original selection + # of columns + return Series(values, index=key_index)._convert( + datetime=True, coerce=should_coerce + ) def _transform_general(self, func, *args, **kwargs): from pandas.core.reshape.concat import concat