Skip to content

Feature/groupby repr ellipses 1135 #24853

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
f44e671
Add truncatable repr for DF groupby groups
benjaminarjun Jan 21, 2019
19bb9bf
Merge branch 'master' into feature/groupby-repr-ellipses-1135
benjaminarjun Jan 21, 2019
d6b310a
Roll back added params to __pprint_dict. All logic now in __repr__ de…
benjaminarjun Jan 21, 2019
43dbc6b
Remove unused line of code
benjaminarjun Jan 21, 2019
49f1def
Merge branch 'master' into feature/groupby-repr-ellipses-1135
benjaminarjun Jan 23, 2019
85d3012
Merge branch 'master' into feature/groupby-repr-ellipses-1135
benjaminarjun Feb 6, 2019
0746c3b
Temporarily disabling failing test
benjaminarjun Feb 6, 2019
6a7d7df
Merge branch 'master' into feature/groupby-repr-ellipses-1135
benjaminarjun Feb 27, 2019
3d4b057
Merge branch 'master' into feature/groupby-repr-ellipses-1135
benjaminarjun Mar 5, 2019
33142cb
Move truncated dict repr to Index.groupby()
benjaminarjun Mar 6, 2019
dbb7d12
Merge branch 'master' into feature/groupby-repr-ellipses-1135
benjaminarjun Mar 6, 2019
5db6c07
Add correct groups object
benjaminarjun Mar 6, 2019
8f30d07
A few misc items for the linter
benjaminarjun Mar 7, 2019
2870163
Merge branch 'master' into feature/groupby-repr-ellipses-1135
benjaminarjun Mar 7, 2019
acfa005
Merge branch 'master' into feature/groupby-repr-ellipses-1135
benjaminarjun Mar 15, 2019
b60329c
Use pprint_thing in IndexGroupByGroups. Add whatsnew, docstring, and …
benjaminarjun Mar 15, 2019
13b73a6
Merge branch 'master' into feature/groupby-repr-ellipses-1135
benjaminarjun Mar 29, 2019
29c6263
Update tests to expect pprint formatting. Use new config location. Sm…
benjaminarjun Mar 30, 2019
ccb98a3
Merge branch 'master' into feature/groupby-repr-ellipses-1135
benjaminarjun Mar 30, 2019
c74cbba
Accept isort formatting preference
benjaminarjun Mar 30, 2019
cdb9ebc
Merge branch 'master' into feature/groupby-repr-ellipses-1135
benjaminarjun Apr 10, 2019
9621669
Add nonsense to AUTHORS.md
benjaminarjun Apr 10, 2019
38ecd1a
Revert "Add nonsense to AUTHORS.md"
benjaminarjun Apr 10, 2019
9742473
Merge branch 'master' into feature/groupby-repr-ellipses-1135
benjaminarjun Apr 28, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@ def _repr_fits_horizontal_(self, ignore_width=False):
Check if full repr fits in horizontal boundaries imposed by the display
options width and max_columns.

In case off non-interactive session, no boundaries apply.
In case of non-interactive session, no boundaries apply.

`ignore_width` is here so ipnb+HTML output can behave the way
users expect. display.max_columns remains in effect.
Expand Down
11 changes: 9 additions & 2 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class providing the base-class of operations.
from pandas.core.base import (
DataError, GroupByError, PandasObject, SelectionMixin, SpecificationError)
import pandas.core.common as com
from pandas.core.config import option_context
from pandas.core.config import get_option, option_context
from pandas.core.frame import DataFrame
from pandas.core.generic import NDFrame
from pandas.core.groupby import base
Expand Down Expand Up @@ -387,7 +387,7 @@ def groups(self):
Dict {group name -> group labels}.
"""
self._assure_grouper()
return self.grouper.groups
return DataFrameGroups(self.grouper.groups)

@property
def ngroups(self):
Expand Down Expand Up @@ -2108,3 +2108,10 @@ def groupby(obj, by, **kwds):
raise TypeError('invalid type: {}'.format(obj))

return klass(obj, by, **kwds)


class DataFrameGroups(dict):
def __repr__(self):
from pandas.io.formats.printing import _pprint_dict
return _pprint_dict(self, max_seq_items=get_option('display.max_rows'),
recurse=False, truncate_at='middle')
34 changes: 23 additions & 11 deletions pandas/io/formats/printing.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def _join_unicode(lines, sep=''):
def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds):
"""
internal. pprinter for iterables. you should probably use pprint_thing()
rather then calling this directly.
rather than calling this directly.

bounds length of printed sequence, depending on options
"""
Expand Down Expand Up @@ -124,11 +124,13 @@ def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds):
return fmt.format(body=body)


def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds):
def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, recurse=True,
truncate_at='end', **kwds):
"""
internal. pprinter for iterables. you should probably use pprint_thing()
rather then calling this directly.
rather than calling this directly.
"""

fmt = u("{{{things}}}")
pairs = []

Expand All @@ -139,16 +141,26 @@ def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds):
else:
nitems = max_seq_items or get_option("max_seq_items") or len(seq)

for k, v in list(seq.items())[:nitems]:
pairs.append(
pfmt.format(
key=pprint_thing(k, _nest_lvl + 1,
max_seq_items=max_seq_items, **kwds),
val=pprint_thing(v, _nest_lvl + 1,
max_seq_items=max_seq_items, **kwds)))
if recurse:
for k, v in list(seq.items())[:nitems]:
pairs.append(
pfmt.format(
key=pprint_thing(k, _nest_lvl + 1,
max_seq_items=max_seq_items, **kwds),
val=pprint_thing(v, _nest_lvl + 1,
max_seq_items=max_seq_items, **kwds)))
else:
for k, v in list(seq.items())[:nitems]:
pairs.append(pfmt.format(key=k, val=v))

if nitems < len(seq):
return fmt.format(things=", ".join(pairs) + ", ...")
if truncate_at == 'middle':
start_cnt, end_cnt = nitems - int(nitems / 2), int(nitems / 2)
return fmt.format(things=", ".join(pairs[:start_cnt]) +
", ... , " +
", ".join(pairs[end_cnt:]))
else:
return fmt.format(things=", ".join(pairs) + ", ...")
else:
return fmt.format(things=", ".join(pairs))

Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/io/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1748,6 +1748,22 @@ def test_period(self):
assert str(df) == exp


class TestDataFrameGroupByFormatting(object):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this goes in pandas/tests/groupby/test_grouping.py near the other repr tests

def test_groups_repr_truncates(self):
df = pd.DataFrame({
'a': [1, 1, 1, 2, 2, 3],
'b': [1, 2, 3, 4, 5, 6]
})

with option_context('display.max_rows', 2):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you also try a grouper like np.array(df.a) which hits a different path

x = df.groupby('a').groups

expected = ("{1: Int64Index([0, 1, 2], dtype='int64'), ... , "
"2: Int64Index([3, 4], dtype='int64')}")

assert x.__repr__() == expected


def gen_series_formatting():
s1 = pd.Series(['a'] * 100)
s2 = pd.Series(['ab'] * 100)
Expand Down