Skip to content

Commit 7d790cf

Browse files
authored
Updating _resolve_numeric_only function of GroupBy (#43154)
1 parent a951998 commit 7d790cf

File tree

4 files changed

+73
-1
lines changed

4 files changed

+73
-1
lines changed

doc/source/whatsnew/v1.3.3.rst

+1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ Fixed regressions
2727
- Fixed regression in :meth:`DataFrame.__getitem__` raising error for slice of :class:`DatetimeIndex` when index is non monotonic (:issue:`43223`)
2828
- Fixed regression in :meth:`.Resampler.aggregate` when used after column selection would raise if ``func`` is a list of aggregation functions (:issue:`42905`)
2929
- Fixed regression in :meth:`DataFrame.corr` where Kendall correlation would produce incorrect results for columns with repeated values (:issue:`43401`)
30+
- Fixed regression in :meth:`DataFrame.groupby` where aggregation on columns with object types dropped results on those columns (:issue:`42395`, :issue:`43108`)
3031
- Fixed regression in :meth:`Series.fillna` raising ``TypeError`` when filling ``float`` ``Series`` with list-like fill value having a dtype which couldn't cast lostlessly (like ``float32`` filled with ``float64``) (:issue:`43424`)
3132
-
3233

pandas/core/groupby/groupby.py

+8
Original file line numberDiff line numberDiff line change
@@ -1195,6 +1195,14 @@ def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool:
11951195
if self.obj.ndim == 2:
11961196
# i.e. DataFrameGroupBy
11971197
numeric_only = True
1198+
# GH#42395 GH#43108 GH#43154
1199+
# Regression from 1.2.5 to 1.3 caused object columns to be dropped
1200+
obj = self._obj_with_exclusions
1201+
check = obj._get_numeric_data()
1202+
if len(obj.columns) and not len(check.columns) and not obj.empty:
1203+
numeric_only = False
1204+
# TODO: v1.4+ Add FutureWarning
1205+
11981206
else:
11991207
numeric_only = False
12001208

pandas/tests/groupby/aggregate/test_cython.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,8 @@ def test_cython_agg_nothing_to_agg():
9797

9898
frame = DataFrame({"a": np.random.randint(0, 5, 50), "b": ["foo", "bar"] * 25})
9999

100-
result = frame[["b"]].groupby(frame["a"]).mean()
100+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
101+
result = frame[["b"]].groupby(frame["a"]).mean()
101102
expected = DataFrame([], index=frame["a"].sort_values().drop_duplicates())
102103
tm.assert_frame_equal(result, expected)
103104

pandas/tests/groupby/test_groupby.py

+62
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
MultiIndex,
1818
RangeIndex,
1919
Series,
20+
Timedelta,
2021
Timestamp,
2122
date_range,
2223
read_csv,
@@ -2392,6 +2393,67 @@ def test_groupby_empty_multi_column(as_index, numeric_only):
23922393
tm.assert_frame_equal(result, expected)
23932394

23942395

2396+
def test_groupby_aggregation_non_numeric_dtype():
2397+
# GH #43108
2398+
df = DataFrame(
2399+
[["M", [1]], ["M", [1]], ["W", [10]], ["W", [20]]], columns=["MW", "v"]
2400+
)
2401+
2402+
expected = DataFrame(
2403+
{
2404+
"v": [[1, 1], [10, 20]],
2405+
},
2406+
index=Index(["M", "W"], dtype="object", name="MW"),
2407+
)
2408+
2409+
gb = df.groupby(by=["MW"])
2410+
result = gb.sum()
2411+
tm.assert_frame_equal(result, expected)
2412+
2413+
2414+
def test_groupby_aggregation_multi_non_numeric_dtype():
2415+
# GH #42395
2416+
df = DataFrame(
2417+
{
2418+
"x": [1, 0, 1, 1, 0],
2419+
"y": [Timedelta(i, "days") for i in range(1, 6)],
2420+
"z": [Timedelta(i * 10, "days") for i in range(1, 6)],
2421+
}
2422+
)
2423+
2424+
expected = DataFrame(
2425+
{
2426+
"y": [Timedelta(i, "days") for i in range(7, 9)],
2427+
"z": [Timedelta(i * 10, "days") for i in range(7, 9)],
2428+
},
2429+
index=Index([0, 1], dtype="int64", name="x"),
2430+
)
2431+
2432+
gb = df.groupby(by=["x"])
2433+
result = gb.sum()
2434+
tm.assert_frame_equal(result, expected)
2435+
2436+
2437+
def test_groupby_aggregation_numeric_with_non_numeric_dtype():
2438+
# GH #43108
2439+
df = DataFrame(
2440+
{
2441+
"x": [1, 0, 1, 1, 0],
2442+
"y": [Timedelta(i, "days") for i in range(1, 6)],
2443+
"z": list(range(1, 6)),
2444+
}
2445+
)
2446+
2447+
expected = DataFrame(
2448+
{"z": [7, 8]},
2449+
index=Index([0, 1], dtype="int64", name="x"),
2450+
)
2451+
2452+
gb = df.groupby(by=["x"])
2453+
result = gb.sum()
2454+
tm.assert_frame_equal(result, expected)
2455+
2456+
23952457
def test_groupby_filtered_df_std():
23962458
# GH 16174
23972459
dicts = [

0 commit comments

Comments
 (0)