From 1a3b4aafaeeb63c560ce3cd8aa248b88c77c8434 Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Mon, 23 Jan 2017 17:45:00 -0500 Subject: [PATCH 1/2] BUG: GH14233 resample().median() failed if duplicate column names were present --- doc/source/whatsnew/v0.20.0.txt | 3 +++ pandas/core/groupby.py | 1 - pandas/tseries/tests/test_resample.py | 15 +++++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index cc3bf696ee4c7..be2be9484adc5 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -464,3 +464,6 @@ Bug Fixes - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) + +- Bug in ``resample().median()`` if duplicate column names were present (:issue:`14233`) + diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 3bbf248ece1d3..81f85d0ed623b 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2203,7 +2203,6 @@ def agg_series(self, obj, func): # cython aggregation _cython_functions = copy.deepcopy(BaseGrouper._cython_functions) - _cython_functions['aggregate'].pop('median') class Grouping(object): diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index fbf0e0095a2f9..94b4e09db2f59 100755 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -2931,6 +2931,21 @@ def test_consistency_with_window(self): self.assertEqual(result.index.nlevels, 2) tm.assert_index_equal(result.index.levels[0], expected) + def test_median_duplicate_columns(self): + # GH 14233 + + df = pd.DataFrame(np.array([[i + j for i in range(20)] + for j in [0, 100, 1000]]) + .T, columns=list('aaa'), + index=pd.date_range('2012-01-01', + periods=20, freq='s')) + df2 = df.copy() + df2.columns = ['a', 'b', 'c'] + expected = df2.resample('5s').median() + result = df.resample('5s').median() + expected.columns = result.columns + assert_frame_equal(result, expected) + class TestTimeGrouper(tm.TestCase): def setUp(self): From 6e0d90012f5fe6ee1357de6b80c78cc06dd624bf Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Tue, 24 Jan 2017 09:54:44 -0500 Subject: [PATCH 2/2] Use randn in test --- pandas/tseries/tests/test_resample.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 94b4e09db2f59..56953541265a6 100755 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -2934,9 +2934,8 @@ def test_consistency_with_window(self): def test_median_duplicate_columns(self): # GH 14233 - df = pd.DataFrame(np.array([[i + j for i in range(20)] - for j in [0, 100, 1000]]) - .T, columns=list('aaa'), + df = pd.DataFrame(np.random.randn(20, 3), + columns=list('aaa'), index=pd.date_range('2012-01-01', periods=20, freq='s')) df2 = df.copy()