@@ -372,24 +372,24 @@ def last(self, offset):
372
372
@frame_base .args_to_kwargs (pd .DataFrame )
373
373
@frame_base .populate_defaults (pd .DataFrame )
374
374
def groupby (self , by , level , axis , as_index , group_keys , ** kwargs ):
375
- """``as_index`` and ``group_keys`` must both be ``True``.
375
+ """``as_index`` must be ``True``.
376
376
377
377
Aggregations grouping by a categorical column with ``observed=False`` set
378
378
are not currently parallelizable
379
379
(`Issue 21827 <https://github.com/apache/beam/issues/21827>`_).
380
380
"""
381
381
if not as_index :
382
382
raise NotImplementedError ('groupby(as_index=False)' )
383
- if not group_keys :
384
- raise NotImplementedError ('groupby(group_keys=False)' )
385
383
386
384
if axis in (1 , 'columns' ):
387
385
return _DeferredGroupByCols (
388
386
expressions .ComputedExpression (
389
387
'groupbycols' ,
390
- lambda df : df .groupby (by , axis = axis , ** kwargs ), [self ._expr ],
388
+ lambda df : df .groupby (
389
+ by , axis = axis , group_keys = group_keys , ** kwargs ), [self ._expr ],
391
390
requires_partition_by = partitionings .Arbitrary (),
392
- preserves_partition_by = partitionings .Arbitrary ()))
391
+ preserves_partition_by = partitionings .Arbitrary ()),
392
+ group_keys = group_keys )
393
393
394
394
if level is None and by is None :
395
395
raise TypeError ("You have to supply one of 'by' and 'level'" )
@@ -559,14 +559,17 @@ def prepend_index(df, by): # type: ignore
559
559
expressions .ComputedExpression (
560
560
'groupbyindex' ,
561
561
lambda df : df .groupby (
562
- level = list (range (df .index .nlevels )), ** kwargs ), [to_group ],
562
+ level = list (range (df .index .nlevels )),
563
+ group_keys = group_keys ,
564
+ ** kwargs ), [to_group ],
563
565
requires_partition_by = partitionings .Index (),
564
566
preserves_partition_by = partitionings .Arbitrary ()),
565
567
kwargs ,
566
568
to_group ,
567
569
to_group_with_index ,
568
570
grouping_columns = grouping_columns ,
569
- grouping_indexes = grouping_indexes )
571
+ grouping_indexes = grouping_indexes ,
572
+ group_keys = group_keys )
570
573
571
574
@property # type: ignore
572
575
@frame_base .with_docs_from (pd .DataFrame )
@@ -676,6 +679,7 @@ def replace(self, to_replace, value, limit, method, **kwargs):
676
679
order-sensitive. It cannot be specified.
677
680
678
681
If ``limit`` is specified this operation is not parallelizable."""
682
+ # pylint: disable-next=c-extension-no-member
679
683
value_compare = None if PD_VERSION < (1 , 4 ) else lib .no_default
680
684
if method is not None and not isinstance (to_replace ,
681
685
dict ) and value is value_compare :
@@ -4123,6 +4127,7 @@ def __init__(self, expr, kwargs,
4123
4127
ungrouped_with_index : expressions .Expression [pd .core .generic .NDFrame ], # pylint: disable=line-too-long
4124
4128
grouping_columns ,
4125
4129
grouping_indexes ,
4130
+ group_keys ,
4126
4131
projection = None ):
4127
4132
"""This object represents the result of::
4128
4133
@@ -4149,6 +4154,7 @@ def __init__(self, expr, kwargs,
4149
4154
self ._projection = projection
4150
4155
self ._grouping_columns = grouping_columns
4151
4156
self ._grouping_indexes = grouping_indexes
4157
+ self ._group_keys = group_keys
4152
4158
self ._kwargs = kwargs
4153
4159
4154
4160
if (self ._kwargs .get ('dropna' , True ) is False and
@@ -4170,6 +4176,7 @@ def __getattr__(self, name):
4170
4176
self ._ungrouped_with_index ,
4171
4177
self ._grouping_columns ,
4172
4178
self ._grouping_indexes ,
4179
+ self ._group_keys ,
4173
4180
projection = name )
4174
4181
4175
4182
def __getitem__ (self , name ):
@@ -4184,6 +4191,7 @@ def __getitem__(self, name):
4184
4191
self ._ungrouped_with_index ,
4185
4192
self ._grouping_columns ,
4186
4193
self ._grouping_indexes ,
4194
+ self ._group_keys ,
4187
4195
projection = name )
4188
4196
4189
4197
@frame_base .with_docs_from (DataFrameGroupBy )
@@ -4233,6 +4241,7 @@ def apply(self, func, *args, **kwargs):
4233
4241
project = _maybe_project_func (self ._projection )
4234
4242
grouping_indexes = self ._grouping_indexes
4235
4243
grouping_columns = self ._grouping_columns
4244
+ group_keys = self ._group_keys
4236
4245
4237
4246
# Unfortunately pandas does not execute func to determine the right proxy.
4238
4247
# We run user func on a proxy here to detect the return type and generate
@@ -4321,7 +4330,8 @@ def do_partition_apply(df):
4321
4330
df = df .reset_index (grouping_columns , drop = True )
4322
4331
4323
4332
gb = df .groupby (level = grouping_indexes or None ,
4324
- by = grouping_columns or None )
4333
+ by = grouping_columns or None ,
4334
+ group_keys = group_keys )
4325
4335
4326
4336
gb = project (gb )
4327
4337
@@ -4361,6 +4371,7 @@ def fn_wrapper(x, *args, **kwargs):
4361
4371
fn_wrapper = fn
4362
4372
4363
4373
project = _maybe_project_func (self ._projection )
4374
+ group_keys = self ._group_keys
4364
4375
4365
4376
# pandas cannot execute fn to determine the right proxy.
4366
4377
# We run user fn on a proxy here to detect the return type and generate the
@@ -4387,10 +4398,12 @@ def fn_wrapper(x, *args, **kwargs):
4387
4398
return DeferredDataFrame (
4388
4399
expressions .ComputedExpression (
4389
4400
'transform' ,
4390
- lambda df : project (df .groupby (level = levels )).transform (
4391
- fn_wrapper ,
4392
- * args ,
4393
- ** kwargs ).droplevel (self ._grouping_columns ),
4401
+ lambda df : project (
4402
+ df .groupby (level = levels , group_keys = group_keys )
4403
+ ).transform (
4404
+ fn_wrapper ,
4405
+ * args ,
4406
+ ** kwargs ).droplevel (self ._grouping_columns ),
4394
4407
[self ._ungrouped_with_index ],
4395
4408
proxy = proxy ,
4396
4409
requires_partition_by = partitionings .Index (levels ),
@@ -4551,6 +4564,7 @@ def wrapper(self, *args, **kwargs):
4551
4564
is_categorical_grouping = any (to_group .get_level_values (i ).is_categorical ()
4552
4565
for i in self ._grouping_indexes )
4553
4566
groupby_kwargs = self ._kwargs
4567
+ group_keys = self ._group_keys
4554
4568
4555
4569
# Don't include un-observed categorical values in the preagg
4556
4570
preagg_groupby_kwargs = groupby_kwargs .copy ()
@@ -4562,6 +4576,7 @@ def wrapper(self, *args, **kwargs):
4562
4576
lambda df : getattr (
4563
4577
project (
4564
4578
df .groupby (level = list (range (df .index .nlevels )),
4579
+ group_keys = group_keys ,
4565
4580
** preagg_groupby_kwargs )
4566
4581
),
4567
4582
agg_name )(** kwargs ),
@@ -4574,6 +4589,7 @@ def wrapper(self, *args, **kwargs):
4574
4589
'post_combine_' + post_agg_name ,
4575
4590
lambda df : getattr (
4576
4591
df .groupby (level = list (range (df .index .nlevels )),
4592
+ group_keys = group_keys ,
4577
4593
** groupby_kwargs ),
4578
4594
post_agg_name )(** kwargs ),
4579
4595
[pre_agg ],
@@ -4597,6 +4613,7 @@ def wrapper(self, *args, **kwargs):
4597
4613
assert isinstance (self , DeferredGroupBy )
4598
4614
4599
4615
to_group = self ._ungrouped .proxy ().index
4616
+ group_keys = self ._group_keys
4600
4617
is_categorical_grouping = any (to_group .get_level_values (i ).is_categorical ()
4601
4618
for i in self ._grouping_indexes )
4602
4619
@@ -4606,6 +4623,7 @@ def wrapper(self, *args, **kwargs):
4606
4623
agg_name ,
4607
4624
lambda df : getattr (project (
4608
4625
df .groupby (level = list (range (df .index .nlevels )),
4626
+ group_keys = group_keys ,
4609
4627
** groupby_kwargs ),
4610
4628
), agg_name )(** kwargs ),
4611
4629
[self ._ungrouped ],
0 commit comments