Skip to content

Commit 88f1507

Browse files
benjaminarjunMarco Gorelli
authored and
Marco Gorelli
committed
Add truncatable repr for DF groupby groups
Roll back added params to __pprint_dict. All logic now in __repr__ def. Make tests more general Remove unused line of code Move truncated dict repr to Index.groupby() Add correct groups object A few misc items for the linter Use pprint_thing in IndexGroupByGroups. Add whatsnew, docstring, and a couple typo fixes Update tests to expect pprint formatting. Use new config location. Small update in doc. Add nonsense to AUTHORS.md Revert "Add nonsense to AUTHORS.md" This reverts commit 9621669.
1 parent 4edcc55 commit 88f1507

File tree

6 files changed

+55
-6
lines changed

6 files changed

+55
-6
lines changed

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -607,7 +607,7 @@ def _repr_fits_horizontal_(self, ignore_width: bool = False) -> bool:
607607
Check if full repr fits in horizontal boundaries imposed by the display
608608
options width and max_columns.
609609
610-
In case off non-interactive session, no boundaries apply.
610+
In case of non-interactive session, no boundaries apply.
611611
612612
`ignore_width` is here so ipnb+HTML output can behave the way
613613
users expect. display.max_columns remains in effect.

pandas/core/groupby/groupby.py

+24-1
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ class providing the base-class of operations.
5656
from pandas.core.arrays import Categorical, DatetimeArray, try_cast_to_ea
5757
from pandas.core.base import DataError, PandasObject, SelectionMixin
5858
import pandas.core.common as com
59+
from pandas.core.config import option_context
5960
from pandas.core.frame import DataFrame
6061
from pandas.core.generic import NDFrame
6162
from pandas.core.groupby import base, ops
@@ -72,7 +73,7 @@ class providing the base-class of operations.
7273

7374
_apply_docs = dict(
7475
template="""
75-
Apply function `func` group-wise and combine the results together.
76+
Apply function `func` group-wise and combine the results together.
7677
7778
The function passed to `apply` must take a {input} as its first
7879
argument and return a DataFrame, Series or scalar. `apply` will
@@ -2567,3 +2568,25 @@ def get_groupby(
25672568
observed=observed,
25682569
mutated=mutated,
25692570
)
2571+
2572+
2573+
class DataFrameGroups(dict):
2574+
def __repr__(self):
2575+
from pandas.compat import u
2576+
2577+
nitems = get_option('display.max_rows') or len(self)
2578+
2579+
fmt = u("{{{things}}}")
2580+
pfmt = u("{key}: {val}")
2581+
2582+
pairs = []
2583+
for k, v in list(self.items()):
2584+
pairs.append(pfmt.format(key=k, val=v))
2585+
2586+
if nitems < len(self):
2587+
start_cnt, end_cnt = nitems - int(nitems / 2), int(nitems / 2)
2588+
return fmt.format(things=", ".join(pairs[:start_cnt]) +
2589+
", ... , " +
2590+
", ".join(pairs[-end_cnt:]))
2591+
else:
2592+
return fmt.format(things=", ".join(pairs))

pandas/core/groupby/ops.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,7 @@ def size(self) -> Series:
246246
@cache_readonly
247247
def groups(self):
248248
""" dict {group name -> group labels} """
249+
249250
if len(self.groupings) == 1:
250251
return self.groupings[0].groups
251252
else:
@@ -350,7 +351,7 @@ def get_group_levels(self):
350351

351352
def _is_builtin_func(self, arg):
352353
"""
353-
if we define an builtin function for this argument, return it,
354+
if we define a builtin function for this argument, return it,
354355
otherwise return the arg
355356
"""
356357
return SelectionMixin._builtin_table.get(arg, arg)

pandas/core/indexes/base.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
from pandas.core.base import IndexOpsMixin, PandasObject
6969
import pandas.core.common as com
7070
from pandas.core.indexers import maybe_convert_indices
71+
from pandas.core.config import get_option
7172
from pandas.core.indexes.frozen import FrozenList
7273
import pandas.core.missing as missing
7374
from pandas.core.ops import get_op_result_name
@@ -4790,7 +4791,7 @@ def groupby(self, values) -> Dict[Hashable, np.ndarray]:
47904791
# map to the label
47914792
result = {k: self.take(v) for k, v in result.items()}
47924793

4793-
return result
4794+
return IndexGroupbyGroups(result)
47944795

47954796
def map(self, mapper, na_action=None):
47964797
"""
@@ -5501,6 +5502,14 @@ def shape(self):
55015502
Index._add_comparison_methods()
55025503

55035504

5505+
class IndexGroupbyGroups(dict):
5506+
"""Dict extension to support abbreviated __repr__"""
5507+
from pandas.io.formats.printing import pprint_thing
5508+
5509+
def __repr__(self):
5510+
return pprint_thing(self, max_seq_items=get_option('display.max_rows'))
5511+
5512+
55045513
def ensure_index_from_sequences(sequences, names=None):
55055514
"""
55065515
Construct an index from sequences of data.

pandas/io/formats/printing.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def _pprint_seq(
9898
) -> str:
9999
"""
100100
internal. pprinter for iterables. you should probably use pprint_thing()
101-
rather then calling this directly.
101+
rather than calling this directly.
102102
103103
bounds length of printed sequence, depending on options
104104
"""
@@ -133,7 +133,7 @@ def _pprint_dict(
133133
) -> str:
134134
"""
135135
internal. pprinter for iterables. you should probably use pprint_thing()
136-
rather then calling this directly.
136+
rather than calling this directly.
137137
"""
138138
fmt = "{{{things}}}"
139139
pairs = []

pandas/tests/io/formats/test_format.py

+16
Original file line numberDiff line numberDiff line change
@@ -2128,6 +2128,22 @@ def test_period(self):
21282128
assert str(df) == exp
21292129

21302130

2131+
class TestDataFrameGroupByFormatting(object):
2132+
def test_groups_repr_truncates(self):
2133+
df = pd.DataFrame({
2134+
'a': [1, 1, 1, 2, 2, 3],
2135+
'b': [1, 2, 3, 4, 5, 6]
2136+
})
2137+
2138+
with option_context('display.max_rows', 2):
2139+
x = df.groupby('a').groups
2140+
assert x.__repr__().endswith('...}')
2141+
2142+
with option_context('display.max_rows', 5):
2143+
x = df.groupby('a').groups
2144+
assert not x.__repr__().endswith('...}')
2145+
2146+
21312147
def gen_series_formatting():
21322148
s1 = pd.Series(["a"] * 100)
21332149
s2 = pd.Series(["ab"] * 100)

0 commit comments

Comments
 (0)