Skip to content

Commit 580a094

Browse files
WillAydjreback
authored andcommitted
GroupBy Cleanup (#23971)
1 parent 440469b commit 580a094

File tree

4 files changed

+21
-27
lines changed

4 files changed

+21
-27
lines changed

pandas/core/groupby/generic.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -826,8 +826,9 @@ def _aggregate_multiple_funcs(self, arg, _level):
826826
for name, func in arg:
827827
obj = self
828828
if name in results:
829-
raise SpecificationError('Function names must be unique, '
830-
'found multiple named %s' % name)
829+
raise SpecificationError(
830+
'Function names must be unique, found multiple named '
831+
'{}'.format(name))
831832

832833
# reset the cache so that we
833834
# only include the named selection
@@ -1027,8 +1028,7 @@ def nunique(self, dropna=True):
10271028
try:
10281029
sorter = np.lexsort((val, ids))
10291030
except TypeError: # catches object dtypes
1030-
msg = ('val.dtype must be object, got {dtype}'
1031-
.format(dtype=val.dtype))
1031+
msg = 'val.dtype must be object, got {}'.format(val.dtype)
10321032
assert val.dtype == object, msg
10331033
val, _ = algorithms.factorize(val, sort=False)
10341034
sorter = np.lexsort((val, ids))

pandas/core/groupby/groupby.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1644,7 +1644,8 @@ def nth(self, n, dropna=None):
16441644
# just returns NaN
16451645
raise ValueError("For a DataFrame groupby, dropna must be "
16461646
"either None, 'any' or 'all', "
1647-
"(was passed %s)." % (dropna),)
1647+
"(was passed {dropna}).".format(
1648+
dropna=dropna))
16481649

16491650
# old behaviour, but with all and any support for DataFrames.
16501651
# modified in GH 7559 to have better perf
@@ -2099,6 +2100,6 @@ def groupby(obj, by, **kwds):
20992100
from pandas.core.groupby.generic import DataFrameGroupBy
21002101
klass = DataFrameGroupBy
21012102
else: # pragma: no cover
2102-
raise TypeError('invalid type: %s' % type(obj))
2103+
raise TypeError('invalid type: {}'.format(obj))
21032104

21042105
return klass(obj, by, **kwds)

pandas/core/groupby/grouper.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
257257
if level is not None:
258258
if not isinstance(level, int):
259259
if level not in index.names:
260-
raise AssertionError('Level %s not in index' % str(level))
260+
raise AssertionError('Level {} not in index'.format(level))
261261
level = index.names.index(level)
262262

263263
if self.name is None:
@@ -317,7 +317,8 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
317317
(Series, Index, ExtensionArray, np.ndarray)):
318318
if getattr(self.grouper, 'ndim', 1) != 1:
319319
t = self.name or str(type(self.grouper))
320-
raise ValueError("Grouper for '%s' not 1-dimensional" % t)
320+
raise ValueError(
321+
"Grouper for '{}' not 1-dimensional".format(t))
321322
self.grouper = self.index.map(self.grouper)
322323
if not (hasattr(self.grouper, "__len__") and
323324
len(self.grouper) == len(self.index)):
@@ -460,8 +461,8 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
460461

461462
if isinstance(level, compat.string_types):
462463
if obj.index.name != level:
463-
raise ValueError('level name %s is not the name of the '
464-
'index' % level)
464+
raise ValueError('level name {} is not the name of the '
465+
'index'.format(level))
465466
elif level > 0 or level < -1:
466467
raise ValueError('level > 0 or level < -1 only valid with '
467468
' MultiIndex')

pandas/core/groupby/ops.py

+9-17
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,8 @@ def get_func(fname):
380380

381381
# otherwise find dtype-specific version, falling back to object
382382
for dt in [dtype_str, 'object']:
383-
f = getattr(libgroupby, "%s_%s" % (fname, dtype_str), None)
383+
f = getattr(libgroupby, "{fname}_{dtype_str}".format(
384+
fname=fname, dtype_str=dtype_str), None)
384385
if f is not None:
385386
return f
386387

@@ -403,9 +404,11 @@ def wrapper(*args, **kwargs):
403404
func = get_func(ftype)
404405

405406
if func is None:
406-
raise NotImplementedError("function is not implemented for this"
407-
"dtype: [how->%s,dtype->%s]" %
408-
(how, dtype_str))
407+
raise NotImplementedError(
408+
"function is not implemented for this dtype: "
409+
"[how->{how},dtype->{dtype_str}]".format(how=how,
410+
dtype_str=dtype_str))
411+
409412
return func
410413

411414
def _cython_operation(self, kind, values, how, axis, min_count=-1,
@@ -485,7 +488,8 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1,
485488
out_dtype = 'float'
486489
else:
487490
if is_numeric:
488-
out_dtype = '%s%d' % (values.dtype.kind, values.dtype.itemsize)
491+
out_dtype = '{kind}{itemsize}'.format(
492+
kind=values.dtype.kind, itemsize=values.dtype.itemsize)
489493
else:
490494
out_dtype = 'object'
491495

@@ -739,10 +743,6 @@ def group_info(self):
739743
obs_group_ids.astype('int64', copy=False),
740744
ngroups)
741745

742-
@cache_readonly
743-
def ngroups(self):
744-
return len(self.result_index)
745-
746746
@cache_readonly
747747
def result_index(self):
748748
if len(self.binlabels) != 0 and isna(self.binlabels[0]):
@@ -769,11 +769,6 @@ def agg_series(self, obj, func):
769769
grouper = reduction.SeriesBinGrouper(obj, func, self.bins, dummy)
770770
return grouper.get_result()
771771

772-
# ----------------------------------------------------------------------
773-
# cython aggregation
774-
775-
_cython_functions = copy.deepcopy(BaseGrouper._cython_functions)
776-
777772

778773
def _get_axes(group):
779774
if isinstance(group, Series):
@@ -853,9 +848,6 @@ def _chop(self, sdata, slice_obj):
853848

854849
class FrameSplitter(DataSplitter):
855850

856-
def __init__(self, data, labels, ngroups, axis=0):
857-
super(FrameSplitter, self).__init__(data, labels, ngroups, axis=axis)
858-
859851
def fast_apply(self, f, names):
860852
# must return keys::list, values::list, mutated::bool
861853
try:

0 commit comments

Comments
 (0)