From dfd44651cc2a08c3047c83c1c35fca3ea9a4f17e Mon Sep 17 00:00:00 2001 From: terrytangyuan Date: Sat, 12 Sep 2015 20:27:34 -0400 Subject: [PATCH] BUG: Fixed bug in groupby.std changing target column when as_index=False (issue 10355) (+1 squashed commit) Squashed commits: [f222db1] BUG: Fixed bug in groupby.std changing target column when as_index=False (issue 10355) --- doc/source/whatsnew/v0.17.0.txt | 1 + pandas/core/groupby.py | 5 +++-- pandas/tests/test_groupby.py | 14 ++++++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 3b3bf8cffe41b..d03bbf7291ab8 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -1067,6 +1067,7 @@ Bug Fixes - Bug in ``algos.outer_join_indexer`` when ``right`` array is empty (:issue:`10618`) - Bug in ``filter`` (regression from 0.16.0) and ``transform`` when grouping on multiple keys, one of which is datetime-like (:issue:`10114`) +- Bug in ``groupby(as_index=False)`` with ``std`` accidentally modifying target column at the same time (:issue:`10355`) - Bug in ``to_datetime`` and ``to_timedelta`` causing ``Index`` name to be lost (:issue:`10875`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index f34fd6e3d2575..c6717c16f3e0e 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -788,8 +788,9 @@ def std(self, ddof=1): For multiple groupings, the result index will be a MultiIndex """ - # todo, implement at cython level? - return np.sqrt(self.var(ddof=ddof)) + self._set_selection_from_grouper() + f = lambda x: np.sqrt(x.var(ddof=ddof)) + return self._python_agg_general(f) def var(self, ddof=1): """ diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 97b57690ccc49..83178970767a4 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -888,6 +888,20 @@ def _check_results(grouped): lambda x: x.weekday()]) _check_results(by_mwkday) + # issue 10355 + def test_std(self): + df = pd.DataFrame({ + 'a' : [1,1,1,2,2,2,3,3,3], + 'b' : [1,2,3,4,5,6,7,8,9], + }) + result = df.groupby('a',as_index=False).std() + expected = pd.DataFrame({ + 'a' : [1, 2, 3], + 'b' : [1, 1, 1] + }) + assert_frame_equal(result, expected) + + def test_aggregate_item_by_item(self): df = self.df.copy()