Skip to content

Commit e4de858

Browse files
author
Gabriel Corona
committed
BUG: fix dtype for .resample().size()/count() of empty series/dataframe (#28427)
1 parent d134b47 commit e4de858

File tree

3 files changed

+73
-12
lines changed

3 files changed

+73
-12
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,7 @@ Groupby/resample/rolling
433433

434434
-
435435
- Bug in :meth:`DataFrame.groupby` with multiple groups where an ``IndexError`` would be raised if any group contained all NA values (:issue:`20519`)
436+
- Bug in :meth:`pandas.core.resample.Resampler.size` and :meth:`pandas.core.resample.Resampler.count` returning wrong dtype when used with an empty series or dataframe (:issue:`28427`)
436437
- Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue: `28192`)
437438
- Bug in :meth:`DataFrame.rolling` not allowing rolling over multi-index levels (:issue: `15584`).
438439
- Bug in :meth:`DataFrame.rolling` not allowing rolling on monotonic decreasing time indexes (:issue: `19248`).

pandas/core/resample.py

+23-12
Original file line numberDiff line numberDiff line change
@@ -869,13 +869,32 @@ def var(self, ddof=1, *args, **kwargs):
869869

870870
@Appender(GroupBy.size.__doc__)
871871
def size(self):
872-
# It's a special case as higher level does return
873-
# a copy of 0-len objects. GH14962
874872
result = self._downsample("size")
875-
if not len(self.ax) and isinstance(self._selected_obj, ABCDataFrame):
873+
if not len(self.ax):
876874
from pandas import Series
877875

878-
result = Series([], index=result.index, dtype="int64")
876+
if self._selected_obj.ndim == 1:
877+
name = self._selected_obj.name
878+
else:
879+
name = None
880+
result = Series([], index=result.index, dtype="int64", name=name)
881+
return result
882+
883+
@Appender(GroupBy.count.__doc__)
884+
def count(self):
885+
result = self._downsample("count")
886+
if not len(self.ax):
887+
if self._selected_obj.ndim == 1:
888+
result = self._selected_obj.__class__(
889+
[], index=result.index, dtype="int64", name=self._selected_obj.name
890+
)
891+
else:
892+
from pandas import DataFrame
893+
894+
result = DataFrame(
895+
[], index=result.index, columns=result.columns, dtype="int64"
896+
)
897+
879898
return result
880899

881900
def quantile(self, q=0.5, **kwargs):
@@ -923,14 +942,6 @@ def g(self, _method=method, *args, **kwargs):
923942
g.__doc__ = getattr(GroupBy, method).__doc__
924943
setattr(Resampler, method, g)
925944

926-
# groupby & aggregate methods
927-
for method in ["count"]:
928-
929-
def h(self, _method=method):
930-
return self._downsample(_method)
931-
932-
h.__doc__ = getattr(GroupBy, method).__doc__
933-
setattr(Resampler, method, h)
934945

935946
# series only methods
936947
for method in ["nunique"]:

pandas/tests/resample/test_base.py

+49
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,22 @@ def test_resample_empty_series(freq, empty_series, resample_method):
112112
tm.assert_series_equal(result, expected, check_dtype=False)
113113

114114

115+
@all_ts
116+
@pytest.mark.parametrize("freq", ["M", "D", "H"])
117+
@pytest.mark.parametrize("resample_method", ["count", "size"])
118+
def test_resample_count_empty_series(freq, empty_series, resample_method):
119+
# GH28427
120+
result = getattr(empty_series.resample(freq), resample_method)()
121+
122+
if isinstance(empty_series.index, PeriodIndex):
123+
index = empty_series.index.asfreq(freq=freq)
124+
else:
125+
index = empty_series.index._shallow_copy(freq=freq)
126+
expected = pd.Series([], dtype="int64", index=index, name=empty_series.name)
127+
128+
tm.assert_series_equal(result, expected)
129+
130+
115131
@all_ts
116132
@pytest.mark.parametrize("freq", ["M", "D", "H"])
117133
def test_resample_empty_dataframe(empty_frame, freq, resample_method):
@@ -136,6 +152,39 @@ def test_resample_empty_dataframe(empty_frame, freq, resample_method):
136152
# test size for GH13212 (currently stays as df)
137153

138154

155+
@all_ts
156+
@pytest.mark.parametrize("freq", ["M", "D", "H"])
157+
def test_resample_count_empty_dataframe(freq, empty_frame):
158+
# GH28427
159+
160+
empty_frame = empty_frame.copy()
161+
empty_frame["a"] = []
162+
163+
result = empty_frame.resample(freq).count()
164+
165+
if isinstance(empty_frame.index, PeriodIndex):
166+
index = empty_frame.index.asfreq(freq=freq)
167+
else:
168+
index = empty_frame.index._shallow_copy(freq=freq)
169+
expected = pd.DataFrame({"a": []}, dtype="int64", index=index)
170+
171+
tm.assert_frame_equal(result, expected)
172+
173+
174+
@pytest.mark.parametrize("freq", ["M", "D", "H"])
175+
def test_resample_size_empty_dataframe(freq):
176+
# GH28427
177+
178+
empty_dataframe = pd.DataFrame(
179+
{"a": []}, dtype="datetime64[ns]", index=pd.DatetimeIndex([])
180+
)
181+
result = empty_dataframe.resample(freq).size()
182+
183+
expected = pd.Series([], dtype="int64", index=pd.DatetimeIndex([], freq=freq))
184+
185+
tm.assert_series_equal(result, expected)
186+
187+
139188
@pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0))
140189
@pytest.mark.parametrize("dtype", [np.float, np.int, np.object, "datetime64[ns]"])
141190
def test_resample_empty_dtypes(index, dtype, resample_method):

0 commit comments

Comments
 (0)