From 705caa24d43b50937598ee6eef1bc3618e22e045 Mon Sep 17 00:00:00 2001 From: Richard Date: Mon, 21 Sep 2020 18:38:13 -0400 Subject: [PATCH 1/2] CLN: breakup _wrap_applied_output --- pandas/core/groupby/generic.py | 125 ++++++++++++++++++--------------- 1 file changed, 69 insertions(+), 56 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index b9cc2c19c224b..87d8e94cd61f4 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1210,64 +1210,77 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): self._insert_inaxis_grouper_inplace(result) return result else: - # this is to silence a DeprecationWarning - # TODO: Remove when default dtype of empty Series is object - kwargs = first_not_none._construct_axes_dict() - backup = create_series_with_explicit_dtype(dtype_if_empty=object, **kwargs) - values = [x if (x is not None) else backup for x in values] - - all_indexed_same = all_indexes_same(x.index for x in values) - - # GH3596 - # provide a reduction (Frame -> Series) if groups are - # unique - if self.squeeze: - applied_index = self._selected_obj._get_axis(self.axis) - singular_series = len(values) == 1 and applied_index.nlevels == 1 - - # assign the name to this series - if singular_series: - values[0].name = keys[0] - - # GH2893 - # we have series in the values array, we want to - # produce a series: - # if any of the sub-series are not indexed the same - # OR we don't have a multi-index and we have only a - # single values - return self._concat_objects( - keys, values, not_indexed_same=not_indexed_same - ) + # values are Series + return self.wrap_applied_output_series( + keys, values, not_indexed_same, first_not_none, key_index + ) - # still a series - # path added as of GH 5545 - elif all_indexed_same: - from pandas.core.reshape.concat import concat - - return concat(values) - - if not all_indexed_same: - # GH 8467 - return self._concat_objects(keys, values, not_indexed_same=True) - - # Combine values - # vstack+constructor is faster than concat and handles MI-columns - stacked_values = np.vstack([np.asarray(v) for v in values]) - - if self.axis == 0: - index = key_index - columns = first_not_none.index.copy() - if columns.name is None: - # GH6124 - propagate name of Series when it's consistent - names = {v.name for v in values} - if len(names) == 1: - columns.name = list(names)[0] - else: - index = first_not_none.index - columns = key_index - stacked_values = stacked_values.T + def _wrap_applied_output_series( + self, + keys, + values: List[Series], + not_indexed_same: bool, + first_not_none, + key_index, + ) -> FrameOrSeriesUnion: + # this is to silence a DeprecationWarning + # TODO: Remove when default dtype of empty Series is object + kwargs = first_not_none._construct_axes_dict() + backup = create_series_with_explicit_dtype(dtype_if_empty=object, **kwargs) + values = [x if (x is not None) else backup for x in values] + + all_indexed_same = all_indexes_same(x.index for x in values) + + # GH3596 + # provide a reduction (Frame -> Series) if groups are + # unique + if self.squeeze: + applied_index = self._selected_obj._get_axis(self.axis) + singular_series = len(values) == 1 and applied_index.nlevels == 1 + + # assign the name to this series + if singular_series: + values[0].name = keys[0] + + # GH2893 + # we have series in the values array, we want to + # produce a series: + # if any of the sub-series are not indexed the same + # OR we don't have a multi-index and we have only a + # single values + return self._concat_objects( + keys, values, not_indexed_same=not_indexed_same + ) + + # still a series + # path added as of GH 5545 + elif all_indexed_same: + from pandas.core.reshape.concat import concat + + return concat(values) + + if not all_indexed_same: + # GH 8467 + return self._concat_objects(keys, values, not_indexed_same=True) + + # Combine values + # vstack+constructor is faster than concat and handles MI-columns + stacked_values = np.vstack([np.asarray(v) for v in values]) + + if self.axis == 0: + index = key_index + columns = first_not_none.index.copy() + if columns.name is None: + # GH6124 - propagate name of Series when it's consistent + names = {v.name for v in values} + if len(names) == 1: + columns.name = list(names)[0] + else: + index = first_not_none.index + columns = key_index + stacked_values = stacked_values.T - result = self.obj._constructor(stacked_values, index=index, columns=columns) + result = self.obj._constructor(stacked_values, index=index, columns=columns) # if we have date/time like in the original, then coerce dates # as we are stacking can easily have object dtypes here From 10fc160f85b41e341b523ad9931a7a2a16998e65 Mon Sep 17 00:00:00 2001 From: Richard Date: Mon, 21 Sep 2020 19:13:55 -0400 Subject: [PATCH 2/2] Missing underscore --- pandas/core/groupby/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 87d8e94cd61f4..29f13107f750a 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1211,7 +1211,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): return result else: # values are Series - return self.wrap_applied_output_series( + return self._wrap_applied_output_series( keys, values, not_indexed_same, first_not_none, key_index )