From 95d2008e68565a1a3fc725e212a54d6755fda200 Mon Sep 17 00:00:00 2001 From: discort Date: Mon, 9 Jan 2017 23:53:17 +0200 Subject: [PATCH 1/6] BUG #14962 --- pandas/core/resample.py | 4 ++++ pandas/tests/test_resample.py | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 2bb825541e23b..d794e033fd60e 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -697,6 +697,10 @@ def _downsample(self, how, **kwargs): if not len(ax): # reset to the new freq obj = obj.copy() + if how == "size" and isinstance(obj, pd.DataFrame): + obj = obj.groupby( + self.grouper, axis=self.axis).aggregate(how, **kwargs) + obj.index.freq = self.freq return obj diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 959e3d2f459ce..4a35af262a76c 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -848,6 +848,14 @@ def test_resample_loffset_arg_type(self): assert_frame_equal(result_agg, expected) assert_frame_equal(result_how, expected) + def test_resample_empty_dataframe_with_size(self): + # GH 14962 + df1 = pd.DataFrame(dict(a=range(100)), + index=pd.date_range('1/1/2000', periods=100, freq="M")) + df2 = df1[df1.a < 0] + result = df2.resample("Q").size() + assertIsInstance(result, pd.Series) + class TestDatetimeIndex(Base): _index_factory = lambda x: date_range From 8f7b43734e15839349fd4412e4bcb5e1458bb9a3 Mon Sep 17 00:00:00 2001 From: discort Date: Thu, 12 Jan 2017 00:18:26 +0200 Subject: [PATCH 2/6] BUG: added series type to wrap_result for empty DataFrame --- doc/source/whatsnew/v0.19.0.txt | 1 + pandas/core/resample.py | 15 ++++++++++----- pandas/tests/test_resample.py | 30 +++++++++++++++++++++--------- 3 files changed, 32 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index bc5e278df743f..00467e3608d54 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -1563,3 +1563,4 @@ Bug Fixes - ``PeriodIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`) - Bug in ``df.groupby`` where ``.median()`` returns arbitrary values if grouped dataframe contains empty bins (:issue:`13629`) - Bug in ``Index.copy()`` where ``name`` parameter was ignored (:issue:`14302`) +- Bug in ``_downsample()``. Inconsistent return type on resample of empty DataFrame (:issue:`14962`) \ No newline at end of file diff --git a/pandas/core/resample.py b/pandas/core/resample.py index d794e033fd60e..df888a97e4949 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -697,12 +697,8 @@ def _downsample(self, how, **kwargs): if not len(ax): # reset to the new freq obj = obj.copy() - if how == "size" and isinstance(obj, pd.DataFrame): - obj = obj.groupby( - self.grouper, axis=self.axis).aggregate(how, **kwargs) - obj.index.freq = self.freq - return obj + return self._wrap_result(obj) # do we have a regular frequency if ax.freq is not None or ax.inferred_freq is not None: @@ -773,6 +769,15 @@ def _wrap_result(self, result): # convert if needed if self.kind == 'period' and not isinstance(result.index, PeriodIndex): result.index = result.index.to_period(self.freq) + + # Make consistent type of result. GH14962 + if not len(self.ax): + grouper = BinGrouper([], result.index) + grouped = self._selected_obj.groupby(grouper) + result = pd.Series([], + index=result.index, + name=grouped.name, + dtype='int64') return result diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 4a35af262a76c..afcef2160f4dd 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -773,8 +773,18 @@ def test_resample_empty_series(self): expected = s.copy() expected.index = s.index._shallow_copy(freq=freq) assert_index_equal(result.index, expected.index) - assert result.index.freq == expected.index.freq - assert_series_equal(result, expected, check_dtype=False) + + self.assertEqual(result.index.freq, expected.index.freq) + + if (method == 'size' and + isinstance(result.index, PeriodIndex) and + freq in ['M', 'D']): + # GH12871 - TODO: name should propagate, but currently + # doesn't on lower / same frequency with PeriodIndex + assert_series_equal(result, expected, check_dtype=False) + + else: + assert_series_equal(result, expected, check_dtype=False) def test_resample_empty_dataframe(self): # GH13212 @@ -783,11 +793,11 @@ def test_resample_empty_dataframe(self): for freq in ['M', 'D', 'H']: # count retains dimensions too - methods = downsample_methods + ['count'] + methods = downsample_methods + upsample_methods for method in methods: result = getattr(f.resample(freq), method)() - expected = f.copy() + expected = pd.Series([]) expected.index = f.index._shallow_copy(freq=freq) assert_index_equal(result.index, expected.index) assert result.index.freq == expected.index.freq @@ -850,11 +860,13 @@ def test_resample_loffset_arg_type(self): def test_resample_empty_dataframe_with_size(self): # GH 14962 - df1 = pd.DataFrame(dict(a=range(100)), - index=pd.date_range('1/1/2000', periods=100, freq="M")) - df2 = df1[df1.a < 0] - result = df2.resample("Q").size() - assertIsInstance(result, pd.Series) + index = pd.DatetimeIndex([], freq='M') + df = pd.DataFrame([], index=index) + + for freq in ['M', 'D', 'H']: + result = df.resample(freq).size() + expected = pd.Series([], index=index, dtype='int64') + assert_series_equal(result, expected) class TestDatetimeIndex(Base): From 37ba820ab2b59e258a9412cb7c0cf8d9723bd088 Mon Sep 17 00:00:00 2001 From: discort Date: Wed, 25 Jan 2017 09:49:41 +0200 Subject: [PATCH 3/6] added explicit 'size' method and defined logic there --- doc/source/whatsnew/v0.19.0.txt | 1 - doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/resample.py | 20 +++++++++----------- pandas/tests/test_resample.py | 21 +++++++++------------ 4 files changed, 19 insertions(+), 24 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 00467e3608d54..bc5e278df743f 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -1563,4 +1563,3 @@ Bug Fixes - ``PeriodIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`) - Bug in ``df.groupby`` where ``.median()`` returns arbitrary values if grouped dataframe contains empty bins (:issue:`13629`) - Bug in ``Index.copy()`` where ``name`` parameter was ignored (:issue:`14302`) -- Bug in ``_downsample()``. Inconsistent return type on resample of empty DataFrame (:issue:`14962`) \ No newline at end of file diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 9d475390175b2..cbb256321fc0a 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1685,6 +1685,7 @@ Groupby/Resample/Rolling - Bug in ``.rolling()`` where ``pd.Timedelta`` or ``datetime.timedelta`` was not accepted as a ``window`` argument (:issue:`15440`) - Bug in ``Rolling.quantile`` function that caused a segmentation fault when called with a quantile value outside of the range [0, 1] (:issue:`15463`) - Bug in ``DataFrame.resample().median()`` if duplicate column names are present (:issue:`14233`) +- Bug in ``resample().size()``. Inconsistent return type on resample of empty DataFrame (:issue:`14962`) Sparse ^^^^^^ diff --git a/pandas/core/resample.py b/pandas/core/resample.py index df888a97e4949..eaa798f0ca225 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -17,6 +17,7 @@ from pandas.core.indexes.period import PeriodIndex, period_range import pandas.core.common as com import pandas.core.algorithms as algos +from pandas.types.generic import ABCDataFrame import pandas.compat as compat from pandas.compat.numpy import function as nv @@ -549,7 +550,13 @@ def var(self, ddof=1, *args, **kwargs): nv.validate_resampler_func('var', args, kwargs) return self._downsample('var', ddof=ddof) - + @Appender(GroupBy.size.__doc__) + def size(self): + # It 'seems' special and needs extra handling. GH14962 + result = self._downsample('size') + if not len(self.ax) and isinstance(self._selected_obj, ABCDataFrame): + result = pd.Series([], index=result.index, dtype='int64') + return result Resampler._deprecated_valids += dir(Resampler) # downsample methods @@ -563,8 +570,7 @@ def f(self, _method=method, *args, **kwargs): setattr(Resampler, method, f) # groupby & aggregate methods -for method in ['count', 'size']: - +for method in ['count']: def f(self, _method=method): return self._downsample(_method) f.__doc__ = getattr(GroupBy, method).__doc__ @@ -770,14 +776,6 @@ def _wrap_result(self, result): if self.kind == 'period' and not isinstance(result.index, PeriodIndex): result.index = result.index.to_period(self.freq) - # Make consistent type of result. GH14962 - if not len(self.ax): - grouper = BinGrouper([], result.index) - grouped = self._selected_obj.groupby(grouper) - result = pd.Series([], - index=result.index, - name=grouped.name, - dtype='int64') return result diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index afcef2160f4dd..1a019d4c3615a 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -796,8 +796,14 @@ def test_resample_empty_dataframe(self): methods = downsample_methods + upsample_methods for method in methods: result = getattr(f.resample(freq), method)() - - expected = pd.Series([]) + if method != 'size': + expected = f.copy() + assert_equal = assert_frame_equal + else: + # GH14962 + expected = Series([]) + assert_equal = assert_series_equal + expected.index = f.index._shallow_copy(freq=freq) assert_index_equal(result.index, expected.index) assert result.index.freq == expected.index.freq @@ -805,6 +811,7 @@ def test_resample_empty_dataframe(self): # test size for GH13212 (currently stays as df) + def test_resample_empty_dtypes(self): # Empty series were sometimes causing a segfault (for the functions @@ -858,16 +865,6 @@ def test_resample_loffset_arg_type(self): assert_frame_equal(result_agg, expected) assert_frame_equal(result_how, expected) - def test_resample_empty_dataframe_with_size(self): - # GH 14962 - index = pd.DatetimeIndex([], freq='M') - df = pd.DataFrame([], index=index) - - for freq in ['M', 'D', 'H']: - result = df.resample(freq).size() - expected = pd.Series([], index=index, dtype='int64') - assert_series_equal(result, expected) - class TestDatetimeIndex(Base): _index_factory = lambda x: date_range From 43b26139d330157baf296080b9c0f5dea67db4ed Mon Sep 17 00:00:00 2001 From: discort Date: Wed, 5 Apr 2017 11:35:59 +0300 Subject: [PATCH 4/6] fixed lint warnings --- pandas/core/resample.py | 2 ++ pandas/tests/test_resample.py | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index eaa798f0ca225..8f137d6038830 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -557,6 +557,8 @@ def size(self): if not len(self.ax) and isinstance(self._selected_obj, ABCDataFrame): result = pd.Series([], index=result.index, dtype='int64') return result + + Resampler._deprecated_valids += dir(Resampler) # downsample methods diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 1a019d4c3615a..2eee5f4beca45 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -803,7 +803,7 @@ def test_resample_empty_dataframe(self): # GH14962 expected = Series([]) assert_equal = assert_series_equal - + expected.index = f.index._shallow_copy(freq=freq) assert_index_equal(result.index, expected.index) assert result.index.freq == expected.index.freq @@ -811,7 +811,6 @@ def test_resample_empty_dataframe(self): # test size for GH13212 (currently stays as df) - def test_resample_empty_dtypes(self): # Empty series were sometimes causing a segfault (for the functions From c0e06b8eb3dba26adaec8bc447e0af6f1c4c351a Mon Sep 17 00:00:00 2001 From: discort Date: Mon, 12 Jun 2017 23:08:19 +0300 Subject: [PATCH 5/6] Updated tests for resampling of empty dataframes --- doc/source/whatsnew/v0.20.0.txt | 1 - doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/resample.py | 5 ++--- pandas/tests/test_resample.py | 20 +++++--------------- 4 files changed, 8 insertions(+), 19 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index cbb256321fc0a..9d475390175b2 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1685,7 +1685,6 @@ Groupby/Resample/Rolling - Bug in ``.rolling()`` where ``pd.Timedelta`` or ``datetime.timedelta`` was not accepted as a ``window`` argument (:issue:`15440`) - Bug in ``Rolling.quantile`` function that caused a segmentation fault when called with a quantile value outside of the range [0, 1] (:issue:`15463`) - Bug in ``DataFrame.resample().median()`` if duplicate column names are present (:issue:`14233`) -- Bug in ``resample().size()``. Inconsistent return type on resample of empty DataFrame (:issue:`14962`) Sparse ^^^^^^ diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 36ca79e8b8714..c9f84b37ad874 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -112,6 +112,7 @@ Plotting Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ +- Bug in ``resample().size()``. Inconsistent return type on resample of empty DataFrame (:issue:`14962`) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 8f137d6038830..f5c615f2b4db9 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -17,7 +17,7 @@ from pandas.core.indexes.period import PeriodIndex, period_range import pandas.core.common as com import pandas.core.algorithms as algos -from pandas.types.generic import ABCDataFrame +from pandas.core.dtypes.generic import ABCDataFrame import pandas.compat as compat from pandas.compat.numpy import function as nv @@ -706,7 +706,7 @@ def _downsample(self, how, **kwargs): # reset to the new freq obj = obj.copy() obj.index.freq = self.freq - return self._wrap_result(obj) + return obj # do we have a regular frequency if ax.freq is not None or ax.inferred_freq is not None: @@ -777,7 +777,6 @@ def _wrap_result(self, result): # convert if needed if self.kind == 'period' and not isinstance(result.index, PeriodIndex): result.index = result.index.to_period(self.freq) - return result diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 2eee5f4beca45..6a3b21df0f511 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -773,18 +773,8 @@ def test_resample_empty_series(self): expected = s.copy() expected.index = s.index._shallow_copy(freq=freq) assert_index_equal(result.index, expected.index) - - self.assertEqual(result.index.freq, expected.index.freq) - - if (method == 'size' and - isinstance(result.index, PeriodIndex) and - freq in ['M', 'D']): - # GH12871 - TODO: name should propagate, but currently - # doesn't on lower / same frequency with PeriodIndex - assert_series_equal(result, expected, check_dtype=False) - - else: - assert_series_equal(result, expected, check_dtype=False) + assert result.index.freq == expected.index.freq + assert_series_equal(result, expected, check_dtype=False) def test_resample_empty_dataframe(self): # GH13212 @@ -798,16 +788,16 @@ def test_resample_empty_dataframe(self): result = getattr(f.resample(freq), method)() if method != 'size': expected = f.copy() - assert_equal = assert_frame_equal + assert_type_equal = assert_frame_equal else: # GH14962 expected = Series([]) - assert_equal = assert_series_equal + assert_type_equal = assert_series_equal expected.index = f.index._shallow_copy(freq=freq) assert_index_equal(result.index, expected.index) assert result.index.freq == expected.index.freq - assert_frame_equal(result, expected, check_dtype=False) + assert_type_equal(result, expected, check_dtype=False) # test size for GH13212 (currently stays as df) From 44f52be242c797db498e38e35ea74945a1d598c3 Mon Sep 17 00:00:00 2001 From: discort Date: Tue, 13 Jun 2017 15:48:22 +0300 Subject: [PATCH 6/6] Fixed Jeff's comments --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/resample.py | 3 ++- pandas/tests/test_resample.py | 4 +--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index c9f84b37ad874..56275cda80e57 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -112,7 +112,7 @@ Plotting Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- Bug in ``resample().size()``. Inconsistent return type on resample of empty DataFrame (:issue:`14962`) +- Bug in ``DataFrame.resample().size()`` where an empty DataFrame did not return a Series (:issue:`14962`) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index f5c615f2b4db9..a8a48624fb885 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -552,7 +552,8 @@ def var(self, ddof=1, *args, **kwargs): @Appender(GroupBy.size.__doc__) def size(self): - # It 'seems' special and needs extra handling. GH14962 + # It's a special case as higher level does return + # a copy of 0-len objects. GH14962 result = self._downsample('size') if not len(self.ax) and isinstance(self._selected_obj, ABCDataFrame): result = pd.Series([], index=result.index, dtype='int64') diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 6a3b21df0f511..15bbd7a9ef5e9 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -788,16 +788,14 @@ def test_resample_empty_dataframe(self): result = getattr(f.resample(freq), method)() if method != 'size': expected = f.copy() - assert_type_equal = assert_frame_equal else: # GH14962 expected = Series([]) - assert_type_equal = assert_series_equal expected.index = f.index._shallow_copy(freq=freq) assert_index_equal(result.index, expected.index) assert result.index.freq == expected.index.freq - assert_type_equal(result, expected, check_dtype=False) + assert_almost_equal(result, expected, check_dtype=False) # test size for GH13212 (currently stays as df)