diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 0027343a13b60..26d794045daa7 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -433,6 +433,7 @@ Groupby/resample/rolling - - Bug in :meth:`DataFrame.groupby` with multiple groups where an ``IndexError`` would be raised if any group contained all NA values (:issue:`20519`) +- Bug in :meth:`pandas.core.resample.Resampler.size` and :meth:`pandas.core.resample.Resampler.count` returning wrong dtype when used with an empty series or dataframe (:issue:`28427`) - Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue: `28192`) - Bug in :meth:`DataFrame.rolling` not allowing rolling over multi-index levels (:issue: `15584`). - Bug in :meth:`DataFrame.rolling` not allowing rolling on monotonic decreasing time indexes (:issue: `19248`). diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 5bb0716728778..81ec4f45ec8e1 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -869,13 +869,32 @@ def var(self, ddof=1, *args, **kwargs): @Appender(GroupBy.size.__doc__) def size(self): - # It's a special case as higher level does return - # a copy of 0-len objects. GH14962 result = self._downsample("size") - if not len(self.ax) and isinstance(self._selected_obj, ABCDataFrame): + if not len(self.ax): from pandas import Series - result = Series([], index=result.index, dtype="int64") + if self._selected_obj.ndim == 1: + name = self._selected_obj.name + else: + name = None + result = Series([], index=result.index, dtype="int64", name=name) + return result + + @Appender(GroupBy.count.__doc__) + def count(self): + result = self._downsample("count") + if not len(self.ax): + if self._selected_obj.ndim == 1: + result = self._selected_obj.__class__( + [], index=result.index, dtype="int64", name=self._selected_obj.name + ) + else: + from pandas import DataFrame + + result = DataFrame( + [], index=result.index, columns=result.columns, dtype="int64" + ) + return result def quantile(self, q=0.5, **kwargs): @@ -923,14 +942,6 @@ def g(self, _method=method, *args, **kwargs): g.__doc__ = getattr(GroupBy, method).__doc__ setattr(Resampler, method, g) -# groupby & aggregate methods -for method in ["count"]: - - def h(self, _method=method): - return self._downsample(_method) - - h.__doc__ = getattr(GroupBy, method).__doc__ - setattr(Resampler, method, h) # series only methods for method in ["nunique"]: diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index dc72800227c0e..161581e16b6fe 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -112,6 +112,22 @@ def test_resample_empty_series(freq, empty_series, resample_method): tm.assert_series_equal(result, expected, check_dtype=False) +@all_ts +@pytest.mark.parametrize("freq", ["M", "D", "H"]) +@pytest.mark.parametrize("resample_method", ["count", "size"]) +def test_resample_count_empty_series(freq, empty_series, resample_method): + # GH28427 + result = getattr(empty_series.resample(freq), resample_method)() + + if isinstance(empty_series.index, PeriodIndex): + index = empty_series.index.asfreq(freq=freq) + else: + index = empty_series.index._shallow_copy(freq=freq) + expected = pd.Series([], dtype="int64", index=index, name=empty_series.name) + + tm.assert_series_equal(result, expected) + + @all_ts @pytest.mark.parametrize("freq", ["M", "D", "H"]) def test_resample_empty_dataframe(empty_frame, freq, resample_method): @@ -136,6 +152,44 @@ def test_resample_empty_dataframe(empty_frame, freq, resample_method): # test size for GH13212 (currently stays as df) +@all_ts +@pytest.mark.parametrize("freq", ["M", "D", "H"]) +def test_resample_count_empty_dataframe(freq, empty_frame): + # GH28427 + + empty_frame = empty_frame.copy() + empty_frame["a"] = [] + + result = empty_frame.resample(freq).count() + + if isinstance(empty_frame.index, PeriodIndex): + index = empty_frame.index.asfreq(freq=freq) + else: + index = empty_frame.index._shallow_copy(freq=freq) + expected = pd.DataFrame({"a": []}, dtype="int64", index=index) + + tm.assert_frame_equal(result, expected) + + +@all_ts +@pytest.mark.parametrize("freq", ["M", "D", "H"]) +def test_resample_size_empty_dataframe(freq, empty_frame): + # GH28427 + + empty_frame = empty_frame.copy() + empty_frame["a"] = [] + + result = empty_frame.resample(freq).size() + + if isinstance(empty_frame.index, PeriodIndex): + index = empty_frame.index.asfreq(freq=freq) + else: + index = empty_frame.index._shallow_copy(freq=freq) + expected = pd.Series([], dtype="int64", index=index) + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0)) @pytest.mark.parametrize("dtype", [np.float, np.int, np.object, "datetime64[ns]"]) def test_resample_empty_dtypes(index, dtype, resample_method):