Skip to content

Commit 6cc8234

Browse files
jbrockmendeljreback
authored andcommitted
CLN core.groupby (#29389)
1 parent b3490cb commit 6cc8234

File tree

6 files changed

+50
-34
lines changed

6 files changed

+50
-34
lines changed

pandas/core/base.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -586,9 +586,16 @@ def _aggregate_multiple_funcs(self, arg, _level, _axis):
586586
new_res = colg.aggregate(arg)
587587
except (TypeError, DataError):
588588
pass
589-
except ValueError:
589+
except ValueError as err:
590590
# cannot aggregate
591-
continue
591+
if "Must produce aggregated value" in str(err):
592+
# raised directly in _aggregate_named
593+
pass
594+
elif "no results" in str(err):
595+
# raised direcly in _aggregate_multiple_funcs
596+
pass
597+
else:
598+
raise
592599
else:
593600
results.append(new_res)
594601
keys.append(col)

pandas/core/groupby/generic.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ def aggregate(self, func=None, *args, **kwargs):
244244
if isinstance(func, str):
245245
return getattr(self, func)(*args, **kwargs)
246246

247-
if isinstance(func, abc.Iterable):
247+
elif isinstance(func, abc.Iterable):
248248
# Catch instances of lists / tuples
249249
# but not the class list / tuple itself.
250250
func = _maybe_mangle_lambdas(func)
@@ -261,8 +261,6 @@ def aggregate(self, func=None, *args, **kwargs):
261261

262262
try:
263263
return self._python_agg_general(func, *args, **kwargs)
264-
except (AssertionError, TypeError):
265-
raise
266264
except (ValueError, KeyError, AttributeError, IndexError):
267265
# TODO: IndexError can be removed here following GH#29106
268266
# TODO: AttributeError is caused by _index_data hijinx in
@@ -325,7 +323,7 @@ def _aggregate_multiple_funcs(self, arg, _level):
325323
if name in results:
326324
raise SpecificationError(
327325
"Function names must be unique, found multiple named "
328-
"{}".format(name)
326+
"{name}".format(name=name)
329327
)
330328

331329
# reset the cache so that we
@@ -1464,8 +1462,6 @@ def _transform_item_by_item(self, obj, wrapper):
14641462
for i, col in enumerate(obj):
14651463
try:
14661464
output[col] = self[col].transform(wrapper)
1467-
except AssertionError:
1468-
raise
14691465
except TypeError:
14701466
# e.g. trying to call nanmean with string values
14711467
pass
@@ -1538,8 +1534,8 @@ def filter(self, func, dropna=True, *args, **kwargs):
15381534
else:
15391535
# non scalars aren't allowed
15401536
raise TypeError(
1541-
"filter function returned a %s, "
1542-
"but expected a scalar bool" % type(res).__name__
1537+
"filter function returned a {typ}, "
1538+
"but expected a scalar bool".format(typ=type(res).__name__)
15431539
)
15441540

15451541
return self._apply_filter(indices, dropna)

pandas/core/groupby/groupby.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ def __init__(
344344
self,
345345
obj: NDFrame,
346346
keys=None,
347-
axis=0,
347+
axis: int = 0,
348348
level=None,
349349
grouper=None,
350350
exclusions=None,
@@ -561,7 +561,9 @@ def __getattr__(self, attr):
561561
return self[attr]
562562

563563
raise AttributeError(
564-
"%r object has no attribute %r" % (type(self).__name__, attr)
564+
"'{typ}' object has no attribute '{attr}'".format(
565+
typ=type(self).__name__, attr=attr
566+
)
565567
)
566568

567569
@Substitution(
@@ -2486,6 +2488,6 @@ def groupby(obj, by, **kwds):
24862488

24872489
klass = DataFrameGroupBy
24882490
else:
2489-
raise TypeError("invalid type: {}".format(obj))
2491+
raise TypeError("invalid type: {obj}".format(obj=obj))
24902492

24912493
return klass(obj, by, **kwds)

pandas/core/groupby/grouper.py

+19-9
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,9 @@ def _set_grouper(self, obj, sort=False):
172172
ax = self._grouper.take(obj.index)
173173
else:
174174
if key not in obj._info_axis:
175-
raise KeyError("The grouper name {0} is not found".format(key))
175+
raise KeyError(
176+
"The grouper name {key} is not found".format(key=key)
177+
)
176178
ax = Index(obj[key], name=key)
177179

178180
else:
@@ -188,7 +190,9 @@ def _set_grouper(self, obj, sort=False):
188190

189191
else:
190192
if level not in (0, ax.name):
191-
raise ValueError("The level {0} is not valid".format(level))
193+
raise ValueError(
194+
"The level {level} is not valid".format(level=level)
195+
)
192196

193197
# possibly sort
194198
if (self.sort or sort) and not ax.is_monotonic:
@@ -278,7 +282,9 @@ def __init__(
278282
if level is not None:
279283
if not isinstance(level, int):
280284
if level not in index.names:
281-
raise AssertionError("Level {} not in index".format(level))
285+
raise AssertionError(
286+
"Level {level} not in index".format(level=level)
287+
)
282288
level = index.names.index(level)
283289

284290
if self.name is None:
@@ -344,15 +350,17 @@ def __init__(
344350
):
345351
if getattr(self.grouper, "ndim", 1) != 1:
346352
t = self.name or str(type(self.grouper))
347-
raise ValueError("Grouper for '{}' not 1-dimensional".format(t))
353+
raise ValueError("Grouper for '{t}' not 1-dimensional".format(t=t))
348354
self.grouper = self.index.map(self.grouper)
349355
if not (
350356
hasattr(self.grouper, "__len__")
351357
and len(self.grouper) == len(self.index)
352358
):
353359
errmsg = (
354360
"Grouper result violates len(labels) == "
355-
"len(data)\nresult: %s" % pprint_thing(self.grouper)
361+
"len(data)\nresult: {grper}".format(
362+
grper=pprint_thing(self.grouper)
363+
)
356364
)
357365
self.grouper = None # Try for sanity
358366
raise AssertionError(errmsg)
@@ -426,7 +434,7 @@ def groups(self):
426434
def _get_grouper(
427435
obj: NDFrame,
428436
key=None,
429-
axis=0,
437+
axis: int = 0,
430438
level=None,
431439
sort=True,
432440
observed=False,
@@ -493,7 +501,9 @@ def _get_grouper(
493501
if isinstance(level, str):
494502
if obj.index.name != level:
495503
raise ValueError(
496-
"level name {} is not the name of the index".format(level)
504+
"level name {level} is not the name of the index".format(
505+
level=level
506+
)
497507
)
498508
elif level > 0 or level < -1:
499509
raise ValueError("level > 0 or level < -1 only valid with MultiIndex")
@@ -582,7 +592,7 @@ def _get_grouper(
582592
exclusions = []
583593

584594
# if the actual grouper should be obj[key]
585-
def is_in_axis(key):
595+
def is_in_axis(key) -> bool:
586596
if not _is_label_like(key):
587597
items = obj._data.items
588598
try:
@@ -594,7 +604,7 @@ def is_in_axis(key):
594604
return True
595605

596606
# if the grouper is obj[name]
597-
def is_in_obj(gpr):
607+
def is_in_obj(gpr) -> bool:
598608
if not hasattr(gpr, "name"):
599609
return False
600610
try:

pandas/core/groupby/ops.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
"""
88

99
import collections
10+
from typing import List, Optional
1011

1112
import numpy as np
1213

@@ -385,7 +386,7 @@ def get_func(fname):
385386

386387
return func
387388

388-
def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs):
389+
def _cython_operation(self, kind: str, values, how, axis, min_count=-1, **kwargs):
389390
assert kind in ["transform", "aggregate"]
390391
orig_values = values
391392

@@ -398,16 +399,18 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs):
398399
# categoricals are only 1d, so we
399400
# are not setup for dim transforming
400401
if is_categorical_dtype(values) or is_sparse(values):
401-
raise NotImplementedError("{} dtype not supported".format(values.dtype))
402+
raise NotImplementedError(
403+
"{dtype} dtype not supported".format(dtype=values.dtype)
404+
)
402405
elif is_datetime64_any_dtype(values):
403406
if how in ["add", "prod", "cumsum", "cumprod"]:
404407
raise NotImplementedError(
405-
"datetime64 type does not support {} operations".format(how)
408+
"datetime64 type does not support {how} operations".format(how=how)
406409
)
407410
elif is_timedelta64_dtype(values):
408411
if how in ["prod", "cumprod"]:
409412
raise NotImplementedError(
410-
"timedelta64 type does not support {} operations".format(how)
413+
"timedelta64 type does not support {how} operations".format(how=how)
411414
)
412415

413416
if is_datetime64tz_dtype(values.dtype):
@@ -513,7 +516,7 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs):
513516
result = result[:, 0]
514517

515518
if how in self._name_functions:
516-
names = self._name_functions[how]()
519+
names = self._name_functions[how]() # type: Optional[List[str]]
517520
else:
518521
names = None
519522

pandas/core/resample.py

+5-7
Original file line numberDiff line numberDiff line change
@@ -361,8 +361,6 @@ def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs):
361361
result = grouped._aggregate_item_by_item(how, *args, **kwargs)
362362
else:
363363
result = grouped.aggregate(how, *args, **kwargs)
364-
except AssertionError:
365-
raise
366364
except DataError:
367365
# we have a non-reducing function; try to evaluate
368366
result = grouped.apply(how, *args, **kwargs)
@@ -1450,7 +1448,7 @@ def _get_resampler(self, obj, kind=None):
14501448
raise TypeError(
14511449
"Only valid with DatetimeIndex, "
14521450
"TimedeltaIndex or PeriodIndex, "
1453-
"but got an instance of %r" % type(ax).__name__
1451+
"but got an instance of '{typ}'".format(typ=type(ax).__name__)
14541452
)
14551453

14561454
def _get_grouper(self, obj, validate=True):
@@ -1463,7 +1461,7 @@ def _get_time_bins(self, ax):
14631461
if not isinstance(ax, DatetimeIndex):
14641462
raise TypeError(
14651463
"axis must be a DatetimeIndex, but got "
1466-
"an instance of %r" % type(ax).__name__
1464+
"an instance of {typ}".format(typ=type(ax).__name__)
14671465
)
14681466

14691467
if len(ax) == 0:
@@ -1539,7 +1537,7 @@ def _get_time_delta_bins(self, ax):
15391537
if not isinstance(ax, TimedeltaIndex):
15401538
raise TypeError(
15411539
"axis must be a TimedeltaIndex, but got "
1542-
"an instance of %r" % type(ax).__name__
1540+
"an instance of {typ}".format(typ=type(ax).__name__)
15431541
)
15441542

15451543
if not len(ax):
@@ -1564,7 +1562,7 @@ def _get_time_period_bins(self, ax):
15641562
if not isinstance(ax, DatetimeIndex):
15651563
raise TypeError(
15661564
"axis must be a DatetimeIndex, but got "
1567-
"an instance of %r" % type(ax).__name__
1565+
"an instance of {typ}".format(typ=type(ax).__name__)
15681566
)
15691567

15701568
freq = self.freq
@@ -1586,7 +1584,7 @@ def _get_period_bins(self, ax):
15861584
if not isinstance(ax, PeriodIndex):
15871585
raise TypeError(
15881586
"axis must be a PeriodIndex, but got "
1589-
"an instance of %r" % type(ax).__name__
1587+
"an instance of {typ}".format(typ=type(ax).__name__)
15901588
)
15911589

15921590
memb = ax.asfreq(self.freq, how=self.convention)

0 commit comments

Comments
 (0)