Skip to content

Commit a928d03

Browse files
author
Gabriel Corona
committed
BUG: fix dtype for .resample().size()/count() of empty series/dataframe (pandas-dev#28427)
1 parent de67bb7 commit a928d03

File tree

3 files changed

+68
-12
lines changed

3 files changed

+68
-12
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,7 @@ Groupby/resample/rolling
335335
^^^^^^^^^^^^^^^^^^^^^^^^
336336

337337
-
338+
- Bug in :meth:`pandas.core.resample.Resampler.size` and :meth:`pandas.core.resample.Resampler.count` returning wrong dtype when used with an empty series or dataframe (:issue:`28427`)
338339
- Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue: `28192`)
339340
- Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`)
340341
- Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`)

pandas/core/resample.py

+24-12
Original file line numberDiff line numberDiff line change
@@ -853,13 +853,33 @@ def var(self, ddof=1, *args, **kwargs):
853853

854854
@Appender(GroupBy.size.__doc__)
855855
def size(self):
856-
# It's a special case as higher level does return
857-
# a copy of 0-len objects. GH14962
858856
result = self._downsample("size")
859-
if not len(self.ax) and isinstance(self._selected_obj, ABCDataFrame):
857+
if not len(self.ax):
860858
from pandas import Series
861859

862-
result = Series([], index=result.index, dtype="int64")
860+
if self._selected_obj.ndim == 1:
861+
name = self._selected_obj.name
862+
result = self._selected_obj.__class__(
863+
[], index=result.index, dtype="int64", name=name
864+
)
865+
else:
866+
result = Series([], index=result.index, dtype="int64")
867+
return result
868+
869+
@Appender(GroupBy.count.__doc__)
870+
def count(self):
871+
result = self._downsample("count")
872+
klass = self._selected_obj.__class__
873+
if not len(self.ax):
874+
if self._selected_obj.ndim == 1:
875+
result = klass(
876+
[], index=result.index, dtype="int64", name=self._selected_obj.name
877+
)
878+
else:
879+
result = klass(
880+
[], index=result.index, dtype="int64", columns=result.columns
881+
)
882+
863883
return result
864884

865885
def quantile(self, q=0.5, **kwargs):
@@ -907,14 +927,6 @@ def g(self, _method=method, *args, **kwargs):
907927
g.__doc__ = getattr(GroupBy, method).__doc__
908928
setattr(Resampler, method, g)
909929

910-
# groupby & aggregate methods
911-
for method in ["count"]:
912-
913-
def h(self, _method=method):
914-
return self._downsample(_method)
915-
916-
h.__doc__ = getattr(GroupBy, method).__doc__
917-
setattr(Resampler, method, h)
918930

919931
# series only methods
920932
for method in ["nunique"]:

pandas/tests/resample/test_base.py

+43
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,19 @@ def test_resample_empty_series(freq, empty_series, resample_method):
118118
assert_series_equal(result, expected, check_dtype=False)
119119

120120

121+
@pytest.mark.parametrize("freq", ["M", "D", "H"])
122+
@pytest.mark.parametrize("resample_method", ["count", "size"])
123+
def test_resample_count_empty_series(freq, resample_method):
124+
# GH28427
125+
126+
empty_series = pd.Series([], dtype="datetime64[ns]", index=pd.DatetimeIndex([]))
127+
result = getattr(empty_series.resample(freq), resample_method)()
128+
129+
expected = pd.Series([], dtype="int64", index=pd.DatetimeIndex([], freq=freq))
130+
131+
assert_series_equal(result, expected)
132+
133+
121134
@all_ts
122135
@pytest.mark.parametrize("freq", ["M", "D", "H"])
123136
def test_resample_empty_dataframe(empty_frame, freq, resample_method):
@@ -142,6 +155,36 @@ def test_resample_empty_dataframe(empty_frame, freq, resample_method):
142155
# test size for GH13212 (currently stays as df)
143156

144157

158+
@pytest.mark.parametrize("freq", ["M", "D", "H"])
159+
def test_resample_count_empty_dataframe(freq):
160+
# GH28427
161+
162+
empty_dataframe = pd.DataFrame(
163+
{"a": []}, dtype="datetime64[ns]", index=pd.DatetimeIndex([])
164+
)
165+
result = empty_dataframe.resample(freq).count()
166+
167+
expected = pd.DataFrame(
168+
{"a": []}, dtype="int64", index=pd.DatetimeIndex([], freq=freq)
169+
)
170+
171+
assert_frame_equal(result, expected)
172+
173+
174+
@pytest.mark.parametrize("freq", ["M", "D", "H"])
175+
def test_resample_size_empty_dataframe(freq):
176+
# GH28427
177+
178+
empty_dataframe = pd.DataFrame(
179+
{"a": []}, dtype="datetime64[ns]", index=pd.DatetimeIndex([])
180+
)
181+
result = empty_dataframe.resample(freq).size()
182+
183+
expected = pd.Series([], dtype="int64", index=pd.DatetimeIndex([], freq=freq))
184+
185+
assert_series_equal(result, expected)
186+
187+
145188
@pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0))
146189
@pytest.mark.parametrize("dtype", [np.float, np.int, np.object, "datetime64[ns]"])
147190
def test_resample_empty_dtypes(index, dtype, resample_method):

0 commit comments

Comments
 (0)