Skip to content

BUG: fix size()/count() when resamping empty series (#28427) #28459

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,7 @@ Groupby/resample/rolling

-
- Bug in :meth:`DataFrame.groupby` with multiple groups where an ``IndexError`` would be raised if any group contained all NA values (:issue:`20519`)
- Bug in :meth:`pandas.core.resample.Resampler.size` and :meth:`pandas.core.resample.Resampler.count` returning wrong dtype when used with an empty series or dataframe (:issue:`28427`)
- Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue: `28192`)
- Bug in :meth:`DataFrame.rolling` not allowing rolling over multi-index levels (:issue: `15584`).
- Bug in :meth:`DataFrame.rolling` not allowing rolling on monotonic decreasing time indexes (:issue: `19248`).
Expand Down
35 changes: 23 additions & 12 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -869,13 +869,32 @@ def var(self, ddof=1, *args, **kwargs):

@Appender(GroupBy.size.__doc__)
def size(self):
# It's a special case as higher level does return
# a copy of 0-len objects. GH14962
result = self._downsample("size")
if not len(self.ax) and isinstance(self._selected_obj, ABCDataFrame):
if not len(self.ax):
from pandas import Series

result = Series([], index=result.index, dtype="int64")
if self._selected_obj.ndim == 1:
name = self._selected_obj.name
else:
name = None
result = Series([], index=result.index, dtype="int64", name=name)
return result

@Appender(GroupBy.count.__doc__)
def count(self):
result = self._downsample("count")
if not len(self.ax):
if self._selected_obj.ndim == 1:
result = self._selected_obj.__class__(
[], index=result.index, dtype="int64", name=self._selected_obj.name
)
else:
from pandas import DataFrame

result = DataFrame(
[], index=result.index, columns=result.columns, dtype="int64"
)

return result

def quantile(self, q=0.5, **kwargs):
Expand Down Expand Up @@ -923,14 +942,6 @@ def g(self, _method=method, *args, **kwargs):
g.__doc__ = getattr(GroupBy, method).__doc__
setattr(Resampler, method, g)

# groupby & aggregate methods
for method in ["count"]:

def h(self, _method=method):
return self._downsample(_method)

h.__doc__ = getattr(GroupBy, method).__doc__
setattr(Resampler, method, h)

# series only methods
for method in ["nunique"]:
Expand Down
54 changes: 54 additions & 0 deletions pandas/tests/resample/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,22 @@ def test_resample_empty_series(freq, empty_series, resample_method):
tm.assert_series_equal(result, expected, check_dtype=False)


@all_ts
@pytest.mark.parametrize("freq", ["M", "D", "H"])
@pytest.mark.parametrize("resample_method", ["count", "size"])
def test_resample_count_empty_series(freq, empty_series, resample_method):
# GH28427
result = getattr(empty_series.resample(freq), resample_method)()

if isinstance(empty_series.index, PeriodIndex):
index = empty_series.index.asfreq(freq=freq)
else:
index = empty_series.index._shallow_copy(freq=freq)
expected = pd.Series([], dtype="int64", index=index, name=empty_series.name)

tm.assert_series_equal(result, expected)


@all_ts
@pytest.mark.parametrize("freq", ["M", "D", "H"])
def test_resample_empty_dataframe(empty_frame, freq, resample_method):
Expand All @@ -136,6 +152,44 @@ def test_resample_empty_dataframe(empty_frame, freq, resample_method):
# test size for GH13212 (currently stays as df)


@all_ts
@pytest.mark.parametrize("freq", ["M", "D", "H"])
def test_resample_count_empty_dataframe(freq, empty_frame):
# GH28427

empty_frame = empty_frame.copy()
empty_frame["a"] = []

result = empty_frame.resample(freq).count()

if isinstance(empty_frame.index, PeriodIndex):
index = empty_frame.index.asfreq(freq=freq)
else:
index = empty_frame.index._shallow_copy(freq=freq)
expected = pd.DataFrame({"a": []}, dtype="int64", index=index)

tm.assert_frame_equal(result, expected)


@all_ts
@pytest.mark.parametrize("freq", ["M", "D", "H"])
def test_resample_size_empty_dataframe(freq, empty_frame):
# GH28427

empty_frame = empty_frame.copy()
empty_frame["a"] = []

result = empty_frame.resample(freq).size()

if isinstance(empty_frame.index, PeriodIndex):
index = empty_frame.index.asfreq(freq=freq)
else:
index = empty_frame.index._shallow_copy(freq=freq)
expected = pd.Series([], dtype="int64", index=index)

tm.assert_series_equal(result, expected)


@pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0))
@pytest.mark.parametrize("dtype", [np.float, np.int, np.object, "datetime64[ns]"])
def test_resample_empty_dtypes(index, dtype, resample_method):
Expand Down