Skip to content

Commit 202cd4e

Browse files
committed
added explicit 'size' method and defined logic there
1 parent 16d76b9 commit 202cd4e

File tree

4 files changed

+20
-29
lines changed

4 files changed

+20
-29
lines changed

doc/source/whatsnew/v0.19.0.txt

-1
Original file line numberDiff line numberDiff line change
@@ -1563,4 +1563,3 @@ Bug Fixes
15631563
- ``PeriodIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`)
15641564
- Bug in ``df.groupby`` where ``.median()`` returns arbitrary values if grouped dataframe contains empty bins (:issue:`13629`)
15651565
- Bug in ``Index.copy()`` where ``name`` parameter was ignored (:issue:`14302`)
1566-
- Bug in ``_downsample()``. Inconsistent return type on resample of empty DataFrame (:issue:`14962`)

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1074,6 +1074,7 @@ Groupby/Resample/Rolling
10741074
- Bug in ``.rolling()`` where ``pd.Timedelta`` or ``datetime.timedelta`` was not accepted as a ``window`` argument (:issue:`15440`)
10751075
- Bug in ``Rolling.quantile`` function that caused a segmentation fault when called with a quantile value outside of the range [0, 1] (:issue:`15463`)
10761076
- Bug in ``DataFrame.resample().median()`` if duplicate column names are present (:issue:`14233`)
1077+
- Bug in ``resample().size()``. Inconsistent return type on resample of empty DataFrame (:issue:`14962`)
10771078

10781079
Sparse
10791080
^^^^^^

pandas/tests/tseries/test_resample.py

+10-17
Original file line numberDiff line numberDiff line change
@@ -757,15 +757,11 @@ def test_resample_empty_series(self):
757757
freq in ['M', 'D']):
758758
# GH12871 - TODO: name should propagate, but currently
759759
# doesn't on lower / same frequency with PeriodIndex
760-
<<<<<<< HEAD
761-
assert_series_equal(result, expected, check_dtype=False)
762760

763-
=======
764761
assert_series_equal(result, expected, check_dtype=False,
765762
check_names=False)
766763
# this assert will break when fixed
767764
# self.assertTrue(result.name is None)
768-
>>>>>>> BUG: added series type to wrap_result for empty DataFrame
769765
else:
770766
assert_series_equal(result, expected, check_dtype=False)
771767

@@ -779,15 +775,22 @@ def test_resample_empty_dataframe(self):
779775
methods = downsample_methods + upsample_methods
780776
for method in methods:
781777
result = getattr(f.resample(freq), method)()
782-
783-
expected = pd.Series([])
778+
if method != 'size':
779+
expected = f.copy()
780+
assert_equal = assert_frame_equal
781+
else:
782+
# GH14962
783+
expected = Series([])
784+
assert_equal = assert_series_equal
785+
784786
expected.index = f.index._shallow_copy(freq=freq)
785787
assert_index_equal(result.index, expected.index)
786788
self.assertEqual(result.index.freq, expected.index.freq)
787-
assert_series_equal(result, expected, check_dtype=False)
789+
assert_equal(result, expected, check_dtype=False)
788790

789791
# test size for GH13212 (currently stays as df)
790792

793+
791794
def test_resample_empty_dtypes(self):
792795

793796
# Empty series were sometimes causing a segfault (for the functions
@@ -841,16 +844,6 @@ def test_resample_loffset_arg_type(self):
841844
assert_frame_equal(result_agg, expected)
842845
assert_frame_equal(result_how, expected)
843846

844-
def test_resample_empty_dataframe_with_size(self):
845-
# GH 14962
846-
index = pd.DatetimeIndex([], freq='M')
847-
df = pd.DataFrame([], index=index)
848-
849-
for freq in ['M', 'D', 'H']:
850-
result = df.resample(freq).size()
851-
expected = pd.Series([], index=index, dtype='int64')
852-
assert_series_equal(result, expected)
853-
854847

855848
class TestDatetimeIndex(Base, tm.TestCase):
856849
_index_factory = lambda x: date_range

pandas/tseries/resample.py

+9-11
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from pandas.tseries.period import PeriodIndex, period_range
1717
import pandas.core.common as com
1818
import pandas.core.algorithms as algos
19+
from pandas.types.generic import ABCDataFrame
1920

2021
import pandas.compat as compat
2122
from pandas.compat.numpy import function as nv
@@ -552,7 +553,13 @@ def var(self, ddof=1, *args, **kwargs):
552553
nv.validate_resampler_func('var', args, kwargs)
553554
return self._downsample('var', ddof=ddof)
554555

555-
556+
@Appender(GroupBy.size.__doc__)
557+
def size(self):
558+
# It 'seems' special and needs extra handling. GH14962
559+
result = self._downsample('size')
560+
if not len(self.ax) and isinstance(self._selected_obj, ABCDataFrame):
561+
result = pd.Series([], index=result.index, dtype='int64')
562+
return result
556563
Resampler._deprecated_valids += dir(Resampler)
557564

558565
# downsample methods
@@ -566,8 +573,7 @@ def f(self, _method=method, *args, **kwargs):
566573
setattr(Resampler, method, f)
567574

568575
# groupby & aggregate methods
569-
for method in ['count', 'size']:
570-
576+
for method in ['count']:
571577
def f(self, _method=method):
572578
return self._downsample(_method)
573579
f.__doc__ = getattr(GroupBy, method).__doc__
@@ -773,14 +779,6 @@ def _wrap_result(self, result):
773779
if self.kind == 'period' and not isinstance(result.index, PeriodIndex):
774780
result.index = result.index.to_period(self.freq)
775781

776-
# Make consistent type of result. GH14962
777-
if not len(self.ax):
778-
grouper = BinGrouper([], result.index)
779-
grouped = self._selected_obj.groupby(grouper)
780-
result = pd.Series([],
781-
index=result.index,
782-
name=grouped.name,
783-
dtype='int64')
784782
return result
785783

786784

0 commit comments

Comments
 (0)