diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 2be7194af34b0..6ae67ab6eedb1 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -1021,7 +1021,7 @@ Performance Improvements - Development support for benchmarking with the `Air Speed Velocity library `_ (:issue:`8316`) - Added vbench benchmarks for alternative ExcelWriter engines and reading Excel files (:issue:`7171`) - Performance improvements in ``Categorical.value_counts`` (:issue:`10804`) -- Performance improvements in ``SeriesGroupBy.nunique`` and ``SeriesGroupBy.value_counts`` (:issue:`10820`, :issue:`11077`) +- Performance improvements in ``SeriesGroupBy.nunique`` and ``SeriesGroupBy.value_counts`` and ``SeriesGroupby.transform`` (:issue:`10820`, :issue:`11077`) - Performance improvements in ``DataFrame.drop_duplicates`` with integer dtypes (:issue:`10917`) - 4x improvement in ``timedelta`` string parsing (:issue:`6755`, :issue:`10426`) - 8x improvement in ``timedelta64`` and ``datetime64`` ops (:issue:`6755`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 8e44480c0c09b..35f5b7fadb9db 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2512,13 +2512,19 @@ def _transform_fast(self, func): if isinstance(func, compat.string_types): func = getattr(self,func) - values = func().values - counts = self.size().fillna(0).values - values = np.repeat(values, com._ensure_platform_int(counts)) - if any(counts == 0): - values = self._try_cast(values, self._selected_obj) + ids, _, ngroup = self.grouper.group_info + mask = ids != -1 + + out = func().values[ids] + if not mask.all(): + out = np.where(mask, out, np.nan) + + obs = np.zeros(ngroup, dtype='bool') + obs[ids[mask]] = True + if not obs.all(): + out = self._try_cast(out, self._selected_obj) - return self._set_result_index_ordered(Series(values)) + return Series(out, index=self.obj.index) def filter(self, func, dropna=True, *args, **kwargs): """