Skip to content

Commit 024f25b

Browse files
committed
Partial progress
1 parent ee60245 commit 024f25b

File tree

2 files changed

+8
-9
lines changed

2 files changed

+8
-9
lines changed

sdks/python/apache_beam/dataframe/frames.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -380,14 +380,12 @@ def groupby(self, by, level, axis, as_index, group_keys, **kwargs):
380380
"""
381381
if not as_index:
382382
raise NotImplementedError('groupby(as_index=False)')
383-
if not group_keys:
384-
raise NotImplementedError('groupby(group_keys=False)')
385383

386384
if axis in (1, 'columns'):
387385
return _DeferredGroupByCols(
388386
expressions.ComputedExpression(
389387
'groupbycols',
390-
lambda df: df.groupby(by, axis=axis, **kwargs), [self._expr],
388+
lambda df: df.groupby(by, axis=axis, group_keys=group_keys, **kwargs), [self._expr],
391389
requires_partition_by=partitionings.Arbitrary(),
392390
preserves_partition_by=partitionings.Arbitrary()))
393391

@@ -559,7 +557,7 @@ def prepend_index(df, by): # type: ignore
559557
expressions.ComputedExpression(
560558
'groupbyindex',
561559
lambda df: df.groupby(
562-
level=list(range(df.index.nlevels)), **kwargs), [to_group],
560+
level=list(range(df.index.nlevels)), group_keys=group_keys, **kwargs), [to_group],
563561
requires_partition_by=partitionings.Index(),
564562
preserves_partition_by=partitionings.Arbitrary()),
565563
kwargs,

sdks/python/apache_beam/dataframe/pandas_doctests_test.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -127,10 +127,7 @@ def test_ndframe_tests(self):
127127
'pandas.core.generic.NDFrame.copy': ['*'],
128128
'pandas.core.generic.NDFrame.droplevel': ['*'],
129129
'pandas.core.generic.NDFrame.get': ['*'],
130-
'pandas.core.generic.NDFrame.rank': [
131-
# Modified dataframe
132-
'df'
133-
],
130+
'pandas.core.generic.NDFrame.rank': ['*'],
134131
'pandas.core.generic.NDFrame.rename': [
135132
# Seems to be an upstream bug. The actual error has a different
136133
# message:
@@ -704,8 +701,11 @@ def test_groupby_tests(self):
704701
result = doctests.testmod(
705702
pd.core.groupby.groupby,
706703
use_beam=False,
704+
verbose=True,
707705
wont_implement_ok={
706+
'pandas.core.groupby.groupby.GroupBy.first': ['*'],
708707
'pandas.core.groupby.groupby.GroupBy.head': ['*'],
708+
'pandas.core.groupby.groupby.GroupBy.last': ['*'],
709709
'pandas.core.groupby.groupby.GroupBy.tail': ['*'],
710710
'pandas.core.groupby.groupby.GroupBy.nth': ['*'],
711711
'pandas.core.groupby.groupby.GroupBy.cumcount': ['*'],
@@ -831,6 +831,7 @@ def test_top_level(self):
831831
'crosstab': ['*'],
832832
'cut': ['*'],
833833
'eval': ['*'],
834+
'from_dummies': ['*'],
834835
'get_dummies': ['*'],
835836
'infer_freq': ['*'],
836837
'lreshape': ['*'],
@@ -863,7 +864,7 @@ def test_top_level(self):
863864
},
864865
skip={
865866
# error formatting
866-
'concat': ['pd.concat([df5, df6], verify_integrity=True)'],
867+
'concat': ['pd.concat([df5, df6], verify_integrity=True)', 'pd.concat([df7, new_row.to_frame().T], ignore_index=True)'],
867868
# doctest DeprecationWarning
868869
'melt': ['df'],
869870
# Order-sensitive re-indexing.

0 commit comments

Comments
 (0)