Skip to content

BUG: DataFrame.resample changing index type to MultiIndex when dataframe is empty and upsampling #55772

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 11 commits into from
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Bug fixes
- Bug in :class:`Series` constructor raising DeprecationWarning when ``index`` is a list of :class:`Series` (:issue:`55228`)
- Bug in :class:`Series` when trying to cast date-like string inputs to :class:`ArrowDtype` of ``pyarrow.timestamp`` (:issue:`56266`)
- Bug in :class:`Timestamp` construction with ``ts_input="now"`` or ``ts_input="today"`` giving a different unit from :meth:`Timestamp.now` or :meth:`Timestamp.today` (:issue:`55879`)
- Bug in :meth:`DataFrame.resample` changing index type to :class:`MultiIndex` when the dataframe is empty and using an upsample method (:issue:`55572`)
- Bug in :meth:`Index.__getitem__` returning wrong result for Arrow dtypes and negative stepsize (:issue:`55832`)
- Fixed bug in :func:`read_csv` not respecting object dtype when ``infer_string`` option is set (:issue:`56047`)
- Fixed bug in :func:`to_numeric` converting to extension dtype for ``string[pyarrow_numpy]`` dtype (:issue:`56179`)
Expand Down
23 changes: 12 additions & 11 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,22 +494,12 @@ def _wrap_result(self, result):
"""
Potentially wrap any results.
"""
# GH 47705
obj = self.obj
if (
isinstance(result, ABCDataFrame)
and len(result) == 0
and not isinstance(result.index, PeriodIndex)
):
result = result.set_index(
_asfreq_compat(obj.index[:0], freq=self.freq), append=True
)

if isinstance(result, ABCSeries) and self._selection is not None:
result.name = self._selection

if isinstance(result, ABCSeries) and result.empty:
# When index is all NaT, result is empty but index is not
obj = self.obj
result.index = _asfreq_compat(obj.index[:0], freq=self.freq)
result.name = getattr(obj, "name", None)

Expand Down Expand Up @@ -1675,6 +1665,17 @@ def func(x):
return x.apply(f, *args, **kwargs)

result = _apply(self._groupby, func, include_groups=self.include_groups)

# GH 47705
if (
isinstance(result, ABCDataFrame)
and len(result) == 0
and not isinstance(result.index, PeriodIndex)
):
result = result.set_index(
_asfreq_compat(self.obj.index[:0], freq=self.freq), append=True
)

return self._wrap_result(result)

_upsample = _apply
Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/resample/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,33 @@ def test_resample_size_empty_dataframe(freq, empty_frame_dti):
tm.assert_series_equal(result, expected)


@all_ts
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
@pytest.mark.parametrize(
"method", ["ffill", "bfill", "nearest", "asfreq", "interpolate"]
)
def test_resample_upsample_empty_dataframe(freq, method, empty_frame_dti):
# GH#55572
if freq == "ME" and isinstance(empty_frame_dti.index, TimedeltaIndex):
msg = (
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
"e.g. '24h' or '3D', not <MonthEnd>"
)
with pytest.raises(ValueError, match=msg):
empty_frame_dti.resample(freq)
return
elif freq == "ME" and isinstance(empty_frame_dti.index, PeriodIndex):
# index is PeriodIndex, so convert to corresponding Period freq
freq = "M"
rs = empty_frame_dti.resample(freq)
result = getattr(rs, method)()

index = _asfreq_compat(empty_frame_dti.index, freq)
expected = DataFrame([], index=index)

tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
"index",
[
Expand Down