Skip to content

Commit 45f54d6

Browse files
committed
whatsnew 1.3.3, move tests, restore mypy
1 parent e141123 commit 45f54d6

File tree

4 files changed

+73
-61
lines changed

4 files changed

+73
-61
lines changed

doc/source/whatsnew/v1.3.3.rst

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ Fixed regressions
2626
- Fixed regression in :func:`is_list_like` where objects with ``__iter__`` set to ``None`` would be identified as iterable (:issue:`43373`)
2727
- Fixed regression in :meth:`.Resampler.aggregate` when used after column selection would raise if ``func`` is a list of aggregation functions (:issue:`42905`)
2828
- Fixed regression in :meth:`DataFrame.corr` where Kendall correlation would produce incorrect results for columns with repeated values (:issue:`43401`)
29+
- Fixed regression in :meth:`DataFrame.groupby` where aggregation on columns with object types dropped results on those columns (:issue:`42395`, :issue:`43108`)
2930

3031
.. ---------------------------------------------------------------------------
3132

pandas/core/groupby/groupby.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -1152,11 +1152,14 @@ def _wrap_applied_output(self, data, keys, values, not_indexed_same: bool = Fals
11521152
def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool:
11531153
"""
11541154
Determine subclass-specific default value for 'numeric_only'.
1155+
11551156
For SeriesGroupBy we want the default to be False (to match Series behavior).
11561157
For DataFrameGroupBy we want it to be True (for backwards-compat).
1158+
11571159
Parameters
11581160
----------
11591161
numeric_only : bool or lib.no_default
1162+
11601163
Returns
11611164
-------
11621165
bool
@@ -1167,14 +1170,19 @@ def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool:
11671170
if self.obj.ndim == 2:
11681171
# i.e. DataFrameGroupBy
11691172
numeric_only = True
1173+
# GH#42395 GH#43108 GH#43154
1174+
# Regression from 1.2.5 to 1.3 caused object columns to be dropped
11701175
obj = self._obj_with_exclusions
11711176
check = obj._get_numeric_data()
11721177
if len(obj.columns) and not len(check.columns) and not obj.empty:
11731178
numeric_only = False
1179+
# TODO: v1.4+ Add FutureWarning
11741180

11751181
else:
11761182
numeric_only = False
1177-
return numeric_only
1183+
# error: Incompatible return value type (got "Union[bool, NoDefault]",
1184+
# expected "bool")
1185+
return numeric_only # type: ignore[return-value]
11781186

11791187
@cache_readonly
11801188
def _group_keys_index(self) -> Index:

pandas/tests/groupby/test_function.py

-60
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,8 @@
1010
from pandas import (
1111
DataFrame,
1212
Index,
13-
Int64Index,
1413
MultiIndex,
1514
Series,
16-
Timedelta,
1715
Timestamp,
1816
date_range,
1917
)
@@ -264,64 +262,6 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
264262

265263
tm.assert_index_equal(result.columns, expected_columns)
266264

267-
def test_groupby_aggregation_non_numeric_dtype(self):
268-
# GH #43108
269-
df = DataFrame(
270-
[["M", [1]], ["M", [1]], ["W", [10]], ["W", [20]]], columns=["MW", "v"]
271-
)
272-
273-
expected = DataFrame(
274-
{
275-
"v": [[1, 1], [10, 20]],
276-
},
277-
index=Index(["M", "W"], dtype="object", name="MW"),
278-
)
279-
280-
gb = df.groupby(by=["MW"])
281-
result = gb.sum()
282-
tm.assert_frame_equal(result, expected)
283-
284-
def test_groupby_aggregation_multi_non_numeric_dtype(self):
285-
# GH #42395
286-
df = DataFrame(
287-
{
288-
"x": [1, 0, 1, 1, 0],
289-
"y": [Timedelta(i, "days") for i in range(1, 6)],
290-
"z": [Timedelta(i * 10, "days") for i in range(1, 6)],
291-
}
292-
)
293-
294-
expected = DataFrame(
295-
{
296-
"y": [Timedelta(i, "days") for i in range(7, 9)],
297-
"z": [Timedelta(i * 10, "days") for i in range(7, 9)],
298-
},
299-
index=Int64Index([0, 1], dtype="int64", name="x"),
300-
)
301-
302-
gb = df.groupby(by=["x"])
303-
result = gb.sum()
304-
tm.assert_frame_equal(result, expected)
305-
306-
def test_groupby_aggregation_numeric_with_non_numeric_dtype(self):
307-
# GH #43108
308-
df = DataFrame(
309-
{
310-
"x": [1, 0, 1, 1, 0],
311-
"y": [Timedelta(i, "days") for i in range(1, 6)],
312-
"z": [i for i in range(1, 6)],
313-
}
314-
)
315-
316-
expected = DataFrame(
317-
{"z": [7, 8]},
318-
index=Int64Index([0, 1], dtype="int64", name="x"),
319-
)
320-
321-
gb = df.groupby(by=["x"])
322-
result = gb.sum()
323-
tm.assert_frame_equal(result, expected)
324-
325265

326266
class TestGroupByNonCythonPaths:
327267
# GH#5610 non-cython calls should not include the grouper

pandas/tests/groupby/test_groupby.py

+63
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,11 @@
1414
DataFrame,
1515
Grouper,
1616
Index,
17+
Int64Index,
1718
MultiIndex,
1819
RangeIndex,
1920
Series,
21+
Timedelta,
2022
Timestamp,
2123
date_range,
2224
read_csv,
@@ -2392,6 +2394,67 @@ def test_groupby_empty_multi_column(as_index, numeric_only):
23922394
tm.assert_frame_equal(result, expected)
23932395

23942396

2397+
def test_groupby_aggregation_non_numeric_dtype():
2398+
# GH #43108
2399+
df = DataFrame(
2400+
[["M", [1]], ["M", [1]], ["W", [10]], ["W", [20]]], columns=["MW", "v"]
2401+
)
2402+
2403+
expected = DataFrame(
2404+
{
2405+
"v": [[1, 1], [10, 20]],
2406+
},
2407+
index=Index(["M", "W"], dtype="object", name="MW"),
2408+
)
2409+
2410+
gb = df.groupby(by=["MW"])
2411+
result = gb.sum()
2412+
tm.assert_frame_equal(result, expected)
2413+
2414+
2415+
def test_groupby_aggregation_multi_non_numeric_dtype():
2416+
# GH #42395
2417+
df = DataFrame(
2418+
{
2419+
"x": [1, 0, 1, 1, 0],
2420+
"y": [Timedelta(i, "days") for i in range(1, 6)],
2421+
"z": [Timedelta(i * 10, "days") for i in range(1, 6)],
2422+
}
2423+
)
2424+
2425+
expected = DataFrame(
2426+
{
2427+
"y": [Timedelta(i, "days") for i in range(7, 9)],
2428+
"z": [Timedelta(i * 10, "days") for i in range(7, 9)],
2429+
},
2430+
index=Int64Index([0, 1], dtype="int64", name="x"),
2431+
)
2432+
2433+
gb = df.groupby(by=["x"])
2434+
result = gb.sum()
2435+
tm.assert_frame_equal(result, expected)
2436+
2437+
2438+
def test_groupby_aggregation_numeric_with_non_numeric_dtype():
2439+
# GH #43108
2440+
df = DataFrame(
2441+
{
2442+
"x": [1, 0, 1, 1, 0],
2443+
"y": [Timedelta(i, "days") for i in range(1, 6)],
2444+
"z": list(range(1, 6)),
2445+
}
2446+
)
2447+
2448+
expected = DataFrame(
2449+
{"z": [7, 8]},
2450+
index=Int64Index([0, 1], dtype="int64", name="x"),
2451+
)
2452+
2453+
gb = df.groupby(by=["x"])
2454+
result = gb.sum()
2455+
tm.assert_frame_equal(result, expected)
2456+
2457+
23952458
def test_groupby_filtered_df_std():
23962459
# GH 16174
23972460
dicts = [

0 commit comments

Comments
 (0)