Skip to content

Commit bcb9e1b

Browse files
authored
BUG: Attributes are lost when subsetting columns in groupby (#35444)
1 parent 668724b commit bcb9e1b

File tree

3 files changed

+61
-2
lines changed

3 files changed

+61
-2
lines changed

doc/source/whatsnew/v1.2.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,8 @@ Groupby/resample/rolling
255255
- Bug when combining methods :meth:`DataFrame.groupby` with :meth:`DataFrame.resample` and :meth:`DataFrame.interpolate` raising an ``TypeError`` (:issue:`35325`)
256256
- Bug in :meth:`DataFrameGroupBy.apply` where a non-nuisance grouping column would be dropped from the output columns if another groupby method was called before ``.apply()`` (:issue:`34656`)
257257
- Bug in :meth:`DataFrameGroupby.apply` would drop a :class:`CategoricalIndex` when grouped on. (:issue:`35792`)
258+
- Bug when subsetting columns on a :class:`~pandas.core.groupby.DataFrameGroupBy` (e.g. ``df.groupby('a')[['b']])``) would reset the attributes ``axis``, ``dropna``, ``group_keys``, ``level``, ``mutated``, ``sort``, and ``squeeze`` to their default values. (:issue:`9959`)
259+
-
258260

259261
Reshaping
260262
^^^^^^^^^

pandas/core/groupby/generic.py

+17-2
Original file line numberDiff line numberDiff line change
@@ -1603,17 +1603,32 @@ def _gotitem(self, key, ndim: int, subset=None):
16031603
return DataFrameGroupBy(
16041604
subset,
16051605
self.grouper,
1606-
selection=key,
1606+
axis=self.axis,
1607+
level=self.level,
16071608
grouper=self.grouper,
16081609
exclusions=self.exclusions,
1610+
selection=key,
16091611
as_index=self.as_index,
1612+
sort=self.sort,
1613+
group_keys=self.group_keys,
1614+
squeeze=self.squeeze,
16101615
observed=self.observed,
1616+
mutated=self.mutated,
1617+
dropna=self.dropna,
16111618
)
16121619
elif ndim == 1:
16131620
if subset is None:
16141621
subset = self.obj[key]
16151622
return SeriesGroupBy(
1616-
subset, selection=key, grouper=self.grouper, observed=self.observed
1623+
subset,
1624+
level=self.level,
1625+
grouper=self.grouper,
1626+
selection=key,
1627+
sort=self.sort,
1628+
group_keys=self.group_keys,
1629+
squeeze=self.squeeze,
1630+
observed=self.observed,
1631+
dropna=self.dropna,
16171632
)
16181633

16191634
raise AssertionError("invalid ndim for _gotitem")

pandas/tests/groupby/test_groupby.py

+42
Original file line numberDiff line numberDiff line change
@@ -2069,3 +2069,45 @@ def test_group_on_two_row_multiindex_returns_one_tuple_key():
20692069
assert len(result) == 1
20702070
key = (1, 2)
20712071
assert (result[key] == expected[key]).all()
2072+
2073+
2074+
@pytest.mark.parametrize(
2075+
"klass, attr, value",
2076+
[
2077+
(DataFrame, "axis", 1),
2078+
(DataFrame, "level", "a"),
2079+
(DataFrame, "as_index", False),
2080+
(DataFrame, "sort", False),
2081+
(DataFrame, "group_keys", False),
2082+
(DataFrame, "squeeze", True),
2083+
(DataFrame, "observed", True),
2084+
(DataFrame, "dropna", False),
2085+
pytest.param(
2086+
Series,
2087+
"axis",
2088+
1,
2089+
marks=pytest.mark.xfail(
2090+
reason="GH 35443: Attribute currently not passed on to series"
2091+
),
2092+
),
2093+
(Series, "level", "a"),
2094+
(Series, "as_index", False),
2095+
(Series, "sort", False),
2096+
(Series, "group_keys", False),
2097+
(Series, "squeeze", True),
2098+
(Series, "observed", True),
2099+
(Series, "dropna", False),
2100+
],
2101+
)
2102+
@pytest.mark.filterwarnings(
2103+
"ignore:The `squeeze` parameter is deprecated:FutureWarning"
2104+
)
2105+
def test_subsetting_columns_keeps_attrs(klass, attr, value):
2106+
# GH 9959 - When subsetting columns, don't drop attributes
2107+
df = pd.DataFrame({"a": [1], "b": [2], "c": [3]})
2108+
if attr != "axis":
2109+
df = df.set_index("a")
2110+
2111+
expected = df.groupby("a", **{attr: value})
2112+
result = expected[["b"]] if klass is DataFrame else expected["b"]
2113+
assert getattr(result, attr) == getattr(expected, attr)

0 commit comments

Comments
 (0)