Skip to content

BUG: resample.apply with non-unique columns #41445

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -901,6 +901,7 @@ Groupby/resample/rolling
- Bug in :meth:`DataFrameGroupBy.rank` with the GroupBy object's ``axis=0`` and the ``rank`` method's keyword ``axis=1`` (:issue:`41320`)
- Bug in :meth:`DataFrameGroupBy.__getitem__` with non-unique columns incorrectly returning a malformed :class:`SeriesGroupBy` instead of :class:`DataFrameGroupBy` (:issue:`41427`)
- Bug in :meth:`DataFrameGroupBy.transform` with non-unique columns incorrectly raising ``AttributeError`` (:issue:`41427`)
- Bug in :meth:`Resampler.apply` with non-unique columns incorrectly dropping duplicated columns (:issue:`41445`)

Reshaping
^^^^^^^^^
Expand Down
21 changes: 14 additions & 7 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1106,6 +1106,7 @@ def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame:

result: dict[Hashable, NDFrame | np.ndarray] = {}
if axis != obj._info_axis_number:
# test_pass_args_kwargs_duplicate_columns gets here with non-unique columns
for name, data in self:
fres = func(data, *args, **kwargs)
result[name] = fres
Expand All @@ -1119,18 +1120,23 @@ def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame:

def _aggregate_item_by_item(self, func, *args, **kwargs) -> DataFrame:
# only for axis==0
# tests that get here with non-unique cols:
# test_resample_with_timedelta_yields_no_empty_groups,
# test_resample_apply_product

obj = self._obj_with_exclusions
result: dict[int | str, NDFrame] = {}
for item in obj:
data = obj[item]
colg = SeriesGroupBy(data, selection=item, grouper=self.grouper)

result[item] = colg.aggregate(func, *args, **kwargs)
for i, item in enumerate(obj):
ser = obj.iloc[:, i]
colg = SeriesGroupBy(
ser, selection=item, grouper=self.grouper, exclusions=self.exclusions
)

result_columns = obj.columns
result[i] = colg.aggregate(func, *args, **kwargs)

return self.obj._constructor(result, columns=result_columns)
res_df = self.obj._constructor(result)
res_df.columns = obj.columns
return res_df

def _wrap_applied_output(self, data, keys, values, not_indexed_same=False):
if len(keys) == 0:
Expand Down Expand Up @@ -1401,6 +1407,7 @@ def _choose_path(self, fast_path: Callable, slow_path: Callable, group: DataFram

def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame:
# iterate through columns, see test_transform_exclude_nuisance
# gets here with non-unique columns
output = {}
inds = []
for i, col in enumerate(obj):
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,26 @@ def f(x, q=None, axis=0):
tm.assert_frame_equal(apply_result, expected, check_names=False)


@pytest.mark.parametrize("as_index", [True, False])
def test_pass_args_kwargs_duplicate_columns(tsframe, as_index):
# go through _aggregate_frame with self.axis == 0 and duplicate columns
tsframe.columns = ["A", "B", "A", "C"]
gb = tsframe.groupby(lambda x: x.month, as_index=as_index)

res = gb.agg(np.percentile, 80, axis=0)

ex_data = {
1: tsframe[tsframe.index.month == 1].quantile(0.8),
2: tsframe[tsframe.index.month == 2].quantile(0.8),
}
expected = DataFrame(ex_data).T
if not as_index:
# TODO: try to get this more consistent?
expected.index = Index(range(2))

tm.assert_frame_equal(res, expected)


def test_len():
df = tm.makeTimeDataFrame()
grouped = df.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day])
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/resample/test_datetime_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1748,19 +1748,23 @@ def test_get_timestamp_range_edges(first, last, freq, exp_first, exp_last):
assert result == expected


def test_resample_apply_product():
@pytest.mark.parametrize("duplicates", [True, False])
def test_resample_apply_product(duplicates):
# GH 5586
index = date_range(start="2012-01-31", freq="M", periods=12)

ts = Series(range(12), index=index)
df = DataFrame({"A": ts, "B": ts + 2})
if duplicates:
df.columns = ["A", "A"]

result = df.resample("Q").apply(np.product)
expected = DataFrame(
np.array([[0, 24], [60, 210], [336, 720], [990, 1716]], dtype=np.int64),
index=DatetimeIndex(
["2012-03-31", "2012-06-30", "2012-09-30", "2012-12-31"], freq="Q-DEC"
),
columns=["A", "B"],
columns=df.columns,
)
tm.assert_frame_equal(result, expected)

Expand Down
8 changes: 7 additions & 1 deletion pandas/tests/resample/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,18 +153,24 @@ def test_resample_timedelta_edge_case(start, end, freq, resample_freq):
assert not np.isnan(result[-1])


def test_resample_with_timedelta_yields_no_empty_groups():
@pytest.mark.parametrize("duplicates", [True, False])
def test_resample_with_timedelta_yields_no_empty_groups(duplicates):
# GH 10603
df = DataFrame(
np.random.normal(size=(10000, 4)),
index=timedelta_range(start="0s", periods=10000, freq="3906250n"),
)
if duplicates:
# case with non-unique columns
df.columns = ["A", "B", "A", "C"]

result = df.loc["1s":, :].resample("3s").apply(lambda x: len(x))

expected = DataFrame(
[[768] * 4] * 12 + [[528] * 4],
index=timedelta_range(start="1s", periods=13, freq="3s"),
)
expected.columns = df.columns
tm.assert_frame_equal(result, expected)


Expand Down