Skip to content

Commit f194094

Browse files
authored
BUG: DataFrameGroupBy.__getitem__ fails to propagate dropna (#35078)
1 parent 067f86f commit f194094

File tree

3 files changed

+43
-11
lines changed

3 files changed

+43
-11
lines changed

doc/source/whatsnew/v1.2.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ Indexing
128128
Missing
129129
^^^^^^^
130130

131-
-
131+
- Bug in :meth:`SeriesGroupBy.transform` now correctly handles missing values for `dropna=False` (:issue:`35014`)
132132
-
133133

134134
MultiIndex

pandas/core/groupby/generic.py

+8-10
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,11 @@
3535
from pandas.util._decorators import Appender, Substitution, doc
3636

3737
from pandas.core.dtypes.cast import (
38+
find_common_type,
3839
maybe_cast_result,
3940
maybe_cast_result_dtype,
4041
maybe_convert_objects,
4142
maybe_downcast_numeric,
42-
maybe_downcast_to_dtype,
4343
)
4444
from pandas.core.dtypes.common import (
4545
ensure_int64,
@@ -513,7 +513,6 @@ def _transform_general(
513513
"""
514514
Transform with a non-str `func`.
515515
"""
516-
517516
if maybe_use_numba(engine):
518517
numba_func, cache_key = generate_numba_func(
519518
func, engine_kwargs, kwargs, "groupby_transform"
@@ -535,24 +534,23 @@ def _transform_general(
535534
if isinstance(res, (ABCDataFrame, ABCSeries)):
536535
res = res._values
537536

538-
indexer = self._get_index(name)
539-
ser = klass(res, indexer)
540-
results.append(ser)
537+
results.append(klass(res, index=group.index))
541538

542539
# check for empty "results" to avoid concat ValueError
543540
if results:
544541
from pandas.core.reshape.concat import concat
545542

546-
result = concat(results).sort_index()
543+
concatenated = concat(results)
544+
result = self._set_result_index_ordered(concatenated)
547545
else:
548546
result = self.obj._constructor(dtype=np.float64)
549-
550547
# we will only try to coerce the result type if
551548
# we have a numeric dtype, as these are *always* user-defined funcs
552549
# the cython take a different path (and casting)
553-
dtype = self._selected_obj.dtype
554-
if is_numeric_dtype(dtype):
555-
result = maybe_downcast_to_dtype(result, dtype)
550+
if is_numeric_dtype(result.dtype):
551+
common_dtype = find_common_type([self._selected_obj.dtype, result.dtype])
552+
if common_dtype is result.dtype:
553+
result = maybe_downcast_numeric(result, self._selected_obj.dtype)
556554

557555
result.name = self._selected_obj.name
558556
result.index = self._selected_obj.index

pandas/tests/groupby/test_groupby_dropna.py

+34
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,40 @@ def test_groupby_dropna_series_by(dropna, expected):
162162
tm.assert_series_equal(result, expected)
163163

164164

165+
@pytest.mark.parametrize(
166+
"dropna,df_expected,s_expected",
167+
[
168+
pytest.param(
169+
True,
170+
pd.DataFrame({"B": [2, 2, 1]}),
171+
pd.Series(data=[2, 2, 1], name="B"),
172+
marks=pytest.mark.xfail(raises=ValueError),
173+
),
174+
(
175+
False,
176+
pd.DataFrame({"B": [2, 2, 1, 1]}),
177+
pd.Series(data=[2, 2, 1, 1], name="B"),
178+
),
179+
],
180+
)
181+
def test_slice_groupby_then_transform(dropna, df_expected, s_expected):
182+
# GH35014
183+
184+
df = pd.DataFrame({"A": [0, 0, 1, None], "B": [1, 2, 3, None]})
185+
gb = df.groupby("A", dropna=dropna)
186+
187+
res = gb.transform(len)
188+
tm.assert_frame_equal(res, df_expected)
189+
190+
gb_slice = gb[["B"]]
191+
res = gb_slice.transform(len)
192+
tm.assert_frame_equal(res, df_expected)
193+
194+
gb_slice = gb["B"]
195+
res = gb["B"].transform(len)
196+
tm.assert_series_equal(res, s_expected)
197+
198+
165199
@pytest.mark.parametrize(
166200
"dropna, tuples, outputs",
167201
[

0 commit comments

Comments
 (0)