Fix conflicts, move classes according to previous review

Marco Gorelli · Marco Gorelli · commit 7064f09ea387 · 2020-01-28T14:22:21.000Z
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -54,7 +54,7 @@ Other API changes
 
 - :meth:`Series.describe` will now show distribution percentiles for ``datetime`` dtypes, statistics ``first`` and ``last``
   will now be ``min`` and ``max`` to match with numeric dtypes in :meth:`DataFrame.describe` (:issue:`30164`)
--
+- :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`)
 
 Backwards incompatible API changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -56,7 +56,6 @@ class providing the base-class of operations.
 from pandas.core.arrays import Categorical, DatetimeArray, try_cast_to_ea
 from pandas.core.base import DataError, PandasObject, SelectionMixin
 import pandas.core.common as com
-from pandas.core.config import option_context
 from pandas.core.frame import DataFrame
 from pandas.core.generic import NDFrame
 from pandas.core.groupby import base, ops
@@ -2568,25 +2567,3 @@ def get_groupby(
         observed=observed,
         mutated=mutated,
     )
-
-
-class DataFrameGroups(dict):
-    def __repr__(self):
-        from pandas.compat import u
-
-        nitems = get_option('display.max_rows') or len(self)
-
-        fmt = u("{{{things}}}")
-        pfmt = u("{key}: {val}")
-
-        pairs = []
-        for k, v in list(self.items()):
-            pairs.append(pfmt.format(key=k, val=v))
-
-        if nitems < len(self):
-            start_cnt, end_cnt = nitems - int(nitems / 2), int(nitems / 2)
-            return fmt.format(things=", ".join(pairs[:start_cnt]) +
-                                     ", ... , " +
-                                     ", ".join(pairs[-end_cnt:]))
-        else:
-            return fmt.format(things=", ".join(pairs))
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -246,7 +246,6 @@ def size(self) -> Series:
     @cache_readonly
     def groups(self):
         """ dict {group name -> group labels} """
-
         if len(self.groupings) == 1:
             return self.groupings[0].groups
         else:
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -68,14 +68,14 @@
 from pandas.core.base import IndexOpsMixin, PandasObject
 import pandas.core.common as com
 from pandas.core.indexers import maybe_convert_indices
-from pandas.core.config import get_option
 from pandas.core.indexes.frozen import FrozenList
 import pandas.core.missing as missing
 from pandas.core.ops import get_op_result_name
 from pandas.core.ops.invalid import make_invalid_op
 from pandas.core.strings import StringMethods
 
 from pandas.io.formats.printing import (
+    PrettyDict,
     default_pprint,
     format_object_attrs,
     format_object_summary,
@@ -4791,7 +4791,7 @@ def groupby(self, values) -> Dict[Hashable, np.ndarray]:
         # map to the label
         result = {k: self.take(v) for k, v in result.items()}
 
-        return IndexGroupbyGroups(result)
+        return PrettyDict(result)
 
     def map(self, mapper, na_action=None):
         """
@@ -5502,14 +5502,6 @@ def shape(self):
 Index._add_comparison_methods()
 
 
-class IndexGroupbyGroups(dict):
-    """Dict extension to support abbreviated __repr__"""
-    from pandas.io.formats.printing import pprint_thing
-
-    def __repr__(self):
-        return pprint_thing(self, max_seq_items=get_option('display.max_rows'))
-
-
 def ensure_index_from_sequences(sequences, names=None):
     """
     Construct an index from sequences of data.
diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py
@@ -528,3 +528,10 @@ def format_object_attrs(
     if len(obj) > max_seq_items:
         attrs.append(("length", len(obj)))
     return attrs
+
+
+class PrettyDict(dict):
+    """Dict extension to support abbreviated __repr__"""
+
+    def __repr__(self):
+        return pprint_thing(self, max_seq_items=get_option("display.max_rows"))
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -2037,3 +2037,16 @@ def test_groupby_list_level():
     expected = pd.DataFrame(np.arange(0, 9).reshape(3, 3))
     result = expected.groupby(level=[0]).mean()
     tm.assert_frame_equal(result, expected)
+
+
+def test_groups_repr_truncates():
+    # GH 1135
+    df = pd.DataFrame({"a": [1, 1, 1, 2, 2, 3], "b": [1, 2, 3, 4, 5, 6]})
+
+    with pd.option_context("display.max_rows", 2):
+        x = df.groupby("a").groups
+        assert x.__repr__().endswith("...}")
+
+    with pd.option_context("display.max_rows", 5):
+        x = df.groupby(np.array(df.a)).groups
+        assert not x.__repr__().endswith("...}")
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
@@ -2128,22 +2128,6 @@ def test_period(self):
         assert str(df) == exp
 
 
-class TestDataFrameGroupByFormatting(object):
-    def test_groups_repr_truncates(self):
-        df = pd.DataFrame({
-            'a': [1, 1, 1, 2, 2, 3],
-            'b': [1, 2, 3, 4, 5, 6]
-        })
-
-        with option_context('display.max_rows', 2):
-            x = df.groupby('a').groups
-            assert x.__repr__().endswith('...}')
-
-        with option_context('display.max_rows', 5):
-            x = df.groupby('a').groups
-            assert not x.__repr__().endswith('...}')
-
-
 def gen_series_formatting():
     s1 = pd.Series(["a"] * 100)
     s2 = pd.Series(["ab"] * 100)