Skip to content

Commit c98b782

Browse files
Koustav-Samaddarjreback
authored andcommitted
BUG: Fix groupby observed=True when aggregating a column (#24412)
1 parent a7d3648 commit c98b782

File tree

3 files changed

+31
-1
lines changed

3 files changed

+31
-1
lines changed

doc/source/whatsnew/v0.24.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1621,6 +1621,7 @@ Groupby/Resample/Rolling
16211621
- Bug in :meth:`pandas.core.groupby.GroupBy.rank` with ``method='dense'`` and ``pct=True`` when a group has only one member would raise a ``ZeroDivisionError`` (:issue:`23666`).
16221622
- Calling :meth:`pandas.core.groupby.GroupBy.rank` with empty groups and ``pct=True`` was raising a ``ZeroDivisionError`` (:issue:`22519`)
16231623
- Bug in :meth:`DataFrame.resample` when resampling ``NaT`` in ``TimeDeltaIndex`` (:issue:`13223`).
1624+
- Bug in :meth:`DataFrame.groupby` did not respect the ``observed`` argument when selecting a column and instead always used ``observed=False`` (:issue:`23970`)
16241625

16251626
Reshaping
16261627
^^^^^^^^^

pandas/core/groupby/generic.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1336,7 +1336,8 @@ def _gotitem(self, key, ndim, subset=None):
13361336
return DataFrameGroupBy(subset, self.grouper, selection=key,
13371337
grouper=self.grouper,
13381338
exclusions=self.exclusions,
1339-
as_index=self.as_index)
1339+
as_index=self.as_index,
1340+
observed=self.observed)
13401341
elif ndim == 1:
13411342
if subset is None:
13421343
subset = self.obj[key]

pandas/tests/groupby/test_categorical.py

+28
Original file line numberDiff line numberDiff line change
@@ -863,6 +863,34 @@ def test_groupby_multiindex_categorical_datetime():
863863
assert_frame_equal(result, expected)
864864

865865

866+
@pytest.mark.parametrize("as_index, expected", [
867+
(True, pd.Series(
868+
index=pd.MultiIndex.from_arrays(
869+
[pd.Series([1, 1, 2], dtype='category'),
870+
[1, 2, 2]], names=['a', 'b']
871+
),
872+
data=[1, 2, 3], name='x'
873+
)),
874+
(False, pd.DataFrame({
875+
'a': pd.Series([1, 1, 2], dtype='category'),
876+
'b': [1, 2, 2],
877+
'x': [1, 2, 3]
878+
}))
879+
])
880+
def test_groupby_agg_observed_true_single_column(as_index, expected):
881+
# GH-23970
882+
df = pd.DataFrame({
883+
'a': pd.Series([1, 1, 2], dtype='category'),
884+
'b': [1, 2, 2],
885+
'x': [1, 2, 3]
886+
})
887+
888+
result = df.groupby(
889+
['a', 'b'], as_index=as_index, observed=True)['x'].sum()
890+
891+
assert_equal(result, expected)
892+
893+
866894
@pytest.mark.parametrize('fill_value', [None, np.nan, pd.NaT])
867895
def test_shift(fill_value):
868896
ct = pd.Categorical(['a', 'b', 'c', 'd'],

0 commit comments

Comments
 (0)