From a599e9d0c1504504bb44b52f993d66d3eb994af5 Mon Sep 17 00:00:00 2001 From: Richard Date: Sun, 6 Sep 2020 08:24:38 -0400 Subject: [PATCH 1/2] CLN: _wrap_applied_output --- pandas/core/groupby/generic.py | 195 ++++++++++++++++----------------- 1 file changed, 97 insertions(+), 98 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index b855ce65f41b2..2981f48f51e77 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1192,113 +1192,112 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): return self.obj._constructor() elif isinstance(first_not_none, DataFrame): return self._concat_objects(keys, values, not_indexed_same=not_indexed_same) - else: - key_index = self.grouper.result_index if self.as_index else None - - if isinstance(first_not_none, Series): - # this is to silence a DeprecationWarning - # TODO: Remove when default dtype of empty Series is object - kwargs = first_not_none._construct_axes_dict() - backup = create_series_with_explicit_dtype( - dtype_if_empty=object, **kwargs - ) - values = [x if (x is not None) else backup for x in values] + key_index = self.grouper.result_index if self.as_index else None - v = values[0] + if isinstance(first_not_none, Series): + # this is to silence a DeprecationWarning + # TODO: Remove when default dtype of empty Series is object + kwargs = first_not_none._construct_axes_dict() + backup = create_series_with_explicit_dtype( + dtype_if_empty=object, **kwargs + ) - if not isinstance(v, (np.ndarray, Index, Series)) and self.as_index: - # values are not series or array-like but scalars - # self._selection_name not passed through to Series as the - # result should not take the name of original selection - # of columns - return self.obj._constructor_sliced(values, index=key_index) + values = [x if (x is not None) else backup for x in values] - else: - if isinstance(v, Series): - all_indexed_same = all_indexes_same((x.index for x in values)) - - # GH3596 - # provide a reduction (Frame -> Series) if groups are - # unique - if self.squeeze: - applied_index = self._selected_obj._get_axis(self.axis) - singular_series = ( - len(values) == 1 and applied_index.nlevels == 1 - ) - - # assign the name to this series - if singular_series: - values[0].name = keys[0] - - # GH2893 - # we have series in the values array, we want to - # produce a series: - # if any of the sub-series are not indexed the same - # OR we don't have a multi-index and we have only a - # single values - return self._concat_objects( - keys, values, not_indexed_same=not_indexed_same - ) - - # still a series - # path added as of GH 5545 - elif all_indexed_same: - from pandas.core.reshape.concat import concat - - return concat(values) - - if not all_indexed_same: - # GH 8467 - return self._concat_objects(keys, values, not_indexed_same=True) - - # Combine values - # vstack+constructor is faster than concat and handles MI-columns - stacked_values = np.vstack([np.asarray(v) for v in values]) - - if self.axis == 0: - index = key_index - columns = v.index.copy() - if columns.name is None: - # GH6124 - propagate name of Series when it's consistent - names = {v.name for v in values} - if len(names) == 1: - columns.name = list(names)[0] - else: - index = v.index - columns = key_index - stacked_values = stacked_values.T - - result = self.obj._constructor( - stacked_values, index=index, columns=columns - ) + v = values[0] - elif not self.as_index: - # We add grouping column below, so create a frame here - result = DataFrame( - values, index=key_index, columns=[self._selection] - ) - else: - # GH#1738: values is list of arrays of unequal lengths - # fall through to the outer else clause - # TODO: sure this is right? we used to do this - # after raising AttributeError above - return self.obj._constructor_sliced( - values, index=key_index, name=self._selection_name + if not isinstance(v, (np.ndarray, Index, Series)) and self.as_index: + # values are not series or array-like but scalars + # self._selection_name not passed through to Series as the + # result should not take the name of original selection + # of columns + return self.obj._constructor_sliced(values, index=key_index) + + if isinstance(v, Series): + all_indexed_same = all_indexes_same((x.index for x in values)) + + # GH3596 + # provide a reduction (Frame -> Series) if groups are + # unique + if self.squeeze: + applied_index = self._selected_obj._get_axis(self.axis) + singular_series = ( + len(values) == 1 and applied_index.nlevels == 1 + ) + + # assign the name to this series + if singular_series: + values[0].name = keys[0] + + # GH2893 + # we have series in the values array, we want to + # produce a series: + # if any of the sub-series are not indexed the same + # OR we don't have a multi-index and we have only a + # single values + return self._concat_objects( + keys, values, not_indexed_same=not_indexed_same ) - # if we have date/time like in the original, then coerce dates - # as we are stacking can easily have object dtypes here - so = self._selected_obj - if so.ndim == 2 and so.dtypes.apply(needs_i8_conversion).any(): - result = _recast_datetimelike_result(result) - else: - result = result._convert(datetime=True) + # still a series + # path added as of GH 5545 + elif all_indexed_same: + from pandas.core.reshape.concat import concat + + return concat(values) + + if not all_indexed_same: + # GH 8467 + return self._concat_objects(keys, values, not_indexed_same=True) + + # Combine values + # vstack+constructor is faster than concat and handles MI-columns + stacked_values = np.vstack([np.asarray(v) for v in values]) + + if self.axis == 0: + index = key_index + columns = v.index.copy() + if columns.name is None: + # GH6124 - propagate name of Series when it's consistent + names = {v.name for v in values} + if len(names) == 1: + columns.name = list(names)[0] + else: + index = v.index + columns = key_index + stacked_values = stacked_values.T - if not self.as_index: - self._insert_inaxis_grouper_inplace(result) + result = self.obj._constructor( + stacked_values, index=index, columns=columns + ) + + elif not self.as_index: + # We add grouping column below, so create a frame here + result = DataFrame( + values, index=key_index, columns=[self._selection] + ) + else: + # GH#1738: values is list of arrays of unequal lengths + # fall through to the outer else clause + # TODO: sure this is right? we used to do this + # after raising AttributeError above + return self.obj._constructor_sliced( + values, index=key_index, name=self._selection_name + ) - return self._reindex_output(result) + # if we have date/time like in the original, then coerce dates + # as we are stacking can easily have object dtypes here + so = self._selected_obj + if so.ndim == 2 and so.dtypes.apply(needs_i8_conversion).any(): + result = _recast_datetimelike_result(result) + else: + result = result._convert(datetime=True) + + if not self.as_index: + self._insert_inaxis_grouper_inplace(result) + + return self._reindex_output(result) def _transform_general( self, func, *args, engine="cython", engine_kwargs=None, **kwargs From 1bfc13d7d05fc52af952f1ada9ed5115bfa9788c Mon Sep 17 00:00:00 2001 From: Richard Date: Sun, 6 Sep 2020 08:32:09 -0400 Subject: [PATCH 2/2] black --- pandas/core/groupby/generic.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 2981f48f51e77..e2b847b442269 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1199,9 +1199,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): # this is to silence a DeprecationWarning # TODO: Remove when default dtype of empty Series is object kwargs = first_not_none._construct_axes_dict() - backup = create_series_with_explicit_dtype( - dtype_if_empty=object, **kwargs - ) + backup = create_series_with_explicit_dtype(dtype_if_empty=object, **kwargs) values = [x if (x is not None) else backup for x in values] @@ -1222,9 +1220,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): # unique if self.squeeze: applied_index = self._selected_obj._get_axis(self.axis) - singular_series = ( - len(values) == 1 and applied_index.nlevels == 1 - ) + singular_series = len(values) == 1 and applied_index.nlevels == 1 # assign the name to this series if singular_series: @@ -1268,15 +1264,11 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): columns = key_index stacked_values = stacked_values.T - result = self.obj._constructor( - stacked_values, index=index, columns=columns - ) + result = self.obj._constructor(stacked_values, index=index, columns=columns) elif not self.as_index: # We add grouping column below, so create a frame here - result = DataFrame( - values, index=key_index, columns=[self._selection] - ) + result = DataFrame(values, index=key_index, columns=[self._selection]) else: # GH#1738: values is list of arrays of unequal lengths # fall through to the outer else clause