Add truncatable repr for DF groupby groups

benjaminarjun · Marco Gorelli · commit 88f15073e643 · 2020-01-28T10:42:33.000Z
Roll back added params to __pprint_dict. All logic now in __repr__ def. Make tests more general Remove unused line of code Move truncated dict repr to Index.groupby() Add correct groups object A few misc items for the linter Use pprint_thing in IndexGroupByGroups. Add whatsnew, docstring, and a couple typo fixes Update tests to expect pprint formatting. Use new config location. Small update in doc. Add nonsense to AUTHORS.md Revert "Add nonsense to AUTHORS.md" This reverts commit 9621669.
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -607,7 +607,7 @@ def _repr_fits_horizontal_(self, ignore_width: bool = False) -> bool:
         Check if full repr fits in horizontal boundaries imposed by the display
         options width and max_columns.
 
-        In case off non-interactive session, no boundaries apply.
+        In case of non-interactive session, no boundaries apply.
 
         `ignore_width` is here so ipnb+HTML output can behave the way
         users expect. display.max_columns remains in effect.
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -56,6 +56,7 @@ class providing the base-class of operations.
 from pandas.core.arrays import Categorical, DatetimeArray, try_cast_to_ea
 from pandas.core.base import DataError, PandasObject, SelectionMixin
 import pandas.core.common as com
+from pandas.core.config import option_context
 from pandas.core.frame import DataFrame
 from pandas.core.generic import NDFrame
 from pandas.core.groupby import base, ops
@@ -72,7 +73,7 @@ class providing the base-class of operations.
 
 _apply_docs = dict(
     template="""
-    Apply function `func`  group-wise and combine the results together.
+    Apply function `func` group-wise and combine the results together.
 
     The function passed to `apply` must take a {input} as its first
     argument and return a DataFrame, Series or scalar. `apply` will
@@ -2567,3 +2568,25 @@ def get_groupby(
         observed=observed,
         mutated=mutated,
     )
+
+
+class DataFrameGroups(dict):
+    def __repr__(self):
+        from pandas.compat import u
+
+        nitems = get_option('display.max_rows') or len(self)
+
+        fmt = u("{{{things}}}")
+        pfmt = u("{key}: {val}")
+
+        pairs = []
+        for k, v in list(self.items()):
+            pairs.append(pfmt.format(key=k, val=v))
+
+        if nitems < len(self):
+            start_cnt, end_cnt = nitems - int(nitems / 2), int(nitems / 2)
+            return fmt.format(things=", ".join(pairs[:start_cnt]) +
+                                     ", ... , " +
+                                     ", ".join(pairs[-end_cnt:]))
+        else:
+            return fmt.format(things=", ".join(pairs))
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -246,6 +246,7 @@ def size(self) -> Series:
     @cache_readonly
     def groups(self):
         """ dict {group name -> group labels} """
+
         if len(self.groupings) == 1:
             return self.groupings[0].groups
         else:
@@ -350,7 +351,7 @@ def get_group_levels(self):
 
     def _is_builtin_func(self, arg):
         """
-        if we define an builtin function for this argument, return it,
+        if we define a builtin function for this argument, return it,
         otherwise return the arg
         """
         return SelectionMixin._builtin_table.get(arg, arg)
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -68,6 +68,7 @@
 from pandas.core.base import IndexOpsMixin, PandasObject
 import pandas.core.common as com
 from pandas.core.indexers import maybe_convert_indices
+from pandas.core.config import get_option
 from pandas.core.indexes.frozen import FrozenList
 import pandas.core.missing as missing
 from pandas.core.ops import get_op_result_name
@@ -4790,7 +4791,7 @@ def groupby(self, values) -> Dict[Hashable, np.ndarray]:
         # map to the label
         result = {k: self.take(v) for k, v in result.items()}
 
-        return result
+        return IndexGroupbyGroups(result)
 
     def map(self, mapper, na_action=None):
         """
@@ -5501,6 +5502,14 @@ def shape(self):
 Index._add_comparison_methods()
 
 
+class IndexGroupbyGroups(dict):
+    """Dict extension to support abbreviated __repr__"""
+    from pandas.io.formats.printing import pprint_thing
+
+    def __repr__(self):
+        return pprint_thing(self, max_seq_items=get_option('display.max_rows'))
+
+
 def ensure_index_from_sequences(sequences, names=None):
     """
     Construct an index from sequences of data.
diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py
@@ -98,7 +98,7 @@ def _pprint_seq(
 ) -> str:
     """
     internal. pprinter for iterables. you should probably use pprint_thing()
-    rather then calling this directly.
+    rather than calling this directly.
 
     bounds length of printed sequence, depending on options
     """
@@ -133,7 +133,7 @@ def _pprint_dict(
 ) -> str:
     """
     internal. pprinter for iterables. you should probably use pprint_thing()
-    rather then calling this directly.
+    rather than calling this directly.
     """
     fmt = "{{{things}}}"
     pairs = []
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
@@ -2128,6 +2128,22 @@ def test_period(self):
         assert str(df) == exp
 
 
+class TestDataFrameGroupByFormatting(object):
+    def test_groups_repr_truncates(self):
+        df = pd.DataFrame({
+            'a': [1, 1, 1, 2, 2, 3],
+            'b': [1, 2, 3, 4, 5, 6]
+        })
+
+        with option_context('display.max_rows', 2):
+            x = df.groupby('a').groups
+            assert x.__repr__().endswith('...}')
+
+        with option_context('display.max_rows', 5):
+            x = df.groupby('a').groups
+            assert not x.__repr__().endswith('...}')
+
+
 def gen_series_formatting():
     s1 = pd.Series(["a"] * 100)
     s2 = pd.Series(["ab"] * 100)