Skip to content

Commit a00c882

Browse files
rhshadrachjbrockmendel
authored andcommitted
BUG: Attributes are lost when subsetting columns in groupby (pandas-dev#35444)
1 parent c1fa680 commit a00c882

File tree

3 files changed

+61
-2
lines changed

3 files changed

+61
-2
lines changed

doc/source/whatsnew/v1.2.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,9 @@ Groupby/resample/rolling
255255
- Bug when combining methods :meth:`DataFrame.groupby` with :meth:`DataFrame.resample` and :meth:`DataFrame.interpolate` raising an ``TypeError`` (:issue:`35325`)
256256
- Bug in :meth:`DataFrameGroupBy.apply` where a non-nuisance grouping column would be dropped from the output columns if another groupby method was called before ``.apply()`` (:issue:`34656`)
257257
- Bug in :meth:`DataFrameGroupby.apply` would drop a :class:`CategoricalIndex` when grouped on. (:issue:`35792`)
258+
- Bug when subsetting columns on a :class:`~pandas.core.groupby.DataFrameGroupBy` (e.g. ``df.groupby('a')[['b']])``) would reset the attributes ``axis``, ``dropna``, ``group_keys``, ``level``, ``mutated``, ``sort``, and ``squeeze`` to their default values. (:issue:`9959`)
258259
- Bug in :meth:`DataFrameGroupby.tshift` failing to raise ``ValueError`` when a frequency cannot be inferred for the index of a group (:issue:`35937`)
260+
-
259261

260262
Reshaping
261263
^^^^^^^^^

pandas/core/groupby/generic.py

+17-2
Original file line numberDiff line numberDiff line change
@@ -1659,17 +1659,32 @@ def _gotitem(self, key, ndim: int, subset=None):
16591659
return DataFrameGroupBy(
16601660
subset,
16611661
self.grouper,
1662-
selection=key,
1662+
axis=self.axis,
1663+
level=self.level,
16631664
grouper=self.grouper,
16641665
exclusions=self.exclusions,
1666+
selection=key,
16651667
as_index=self.as_index,
1668+
sort=self.sort,
1669+
group_keys=self.group_keys,
1670+
squeeze=self.squeeze,
16661671
observed=self.observed,
1672+
mutated=self.mutated,
1673+
dropna=self.dropna,
16671674
)
16681675
elif ndim == 1:
16691676
if subset is None:
16701677
subset = self.obj[key]
16711678
return SeriesGroupBy(
1672-
subset, selection=key, grouper=self.grouper, observed=self.observed
1679+
subset,
1680+
level=self.level,
1681+
grouper=self.grouper,
1682+
selection=key,
1683+
sort=self.sort,
1684+
group_keys=self.group_keys,
1685+
squeeze=self.squeeze,
1686+
observed=self.observed,
1687+
dropna=self.dropna,
16731688
)
16741689

16751690
raise AssertionError("invalid ndim for _gotitem")

pandas/tests/groupby/test_groupby.py

+42
Original file line numberDiff line numberDiff line change
@@ -2069,3 +2069,45 @@ def test_group_on_two_row_multiindex_returns_one_tuple_key():
20692069
assert len(result) == 1
20702070
key = (1, 2)
20712071
assert (result[key] == expected[key]).all()
2072+
2073+
2074+
@pytest.mark.parametrize(
2075+
"klass, attr, value",
2076+
[
2077+
(DataFrame, "axis", 1),
2078+
(DataFrame, "level", "a"),
2079+
(DataFrame, "as_index", False),
2080+
(DataFrame, "sort", False),
2081+
(DataFrame, "group_keys", False),
2082+
(DataFrame, "squeeze", True),
2083+
(DataFrame, "observed", True),
2084+
(DataFrame, "dropna", False),
2085+
pytest.param(
2086+
Series,
2087+
"axis",
2088+
1,
2089+
marks=pytest.mark.xfail(
2090+
reason="GH 35443: Attribute currently not passed on to series"
2091+
),
2092+
),
2093+
(Series, "level", "a"),
2094+
(Series, "as_index", False),
2095+
(Series, "sort", False),
2096+
(Series, "group_keys", False),
2097+
(Series, "squeeze", True),
2098+
(Series, "observed", True),
2099+
(Series, "dropna", False),
2100+
],
2101+
)
2102+
@pytest.mark.filterwarnings(
2103+
"ignore:The `squeeze` parameter is deprecated:FutureWarning"
2104+
)
2105+
def test_subsetting_columns_keeps_attrs(klass, attr, value):
2106+
# GH 9959 - When subsetting columns, don't drop attributes
2107+
df = pd.DataFrame({"a": [1], "b": [2], "c": [3]})
2108+
if attr != "axis":
2109+
df = df.set_index("a")
2110+
2111+
expected = df.groupby("a", **{attr: value})
2112+
result = expected[["b"]] if klass is DataFrame else expected["b"]
2113+
assert getattr(result, attr) == getattr(expected, attr)

0 commit comments

Comments
 (0)