Skip to content

Commit addfd7a

Browse files
authored
ENH: truncate output of Groupby.groups (#31388)
1 parent 78d8891 commit addfd7a

File tree

8 files changed

+40
-8
lines changed

8 files changed

+40
-8
lines changed

doc/source/development/contributing.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ check each issue individually, and it's not possible to find the unassigned ones
3232

3333
For this reason, we implemented a workaround consisting of adding a comment with the exact
3434
text `take`. When you do it, a GitHub action will automatically assign you the issue
35-
(this will take seconds, and may require refreshint the page to see it).
35+
(this will take seconds, and may require refreshing the page to see it).
3636
By doing this, it's possible to filter the list of issues and find only the unassigned ones.
3737

3838
So, a good way to find an issue to start contributing to pandas is to check the list of

doc/source/whatsnew/v1.1.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ Other API changes
5454

5555
- :meth:`Series.describe` will now show distribution percentiles for ``datetime`` dtypes, statistics ``first`` and ``last``
5656
will now be ``min`` and ``max`` to match with numeric dtypes in :meth:`DataFrame.describe` (:issue:`30164`)
57-
-
57+
- :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`)
5858

5959
Backwards incompatible API changes
6060
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -607,7 +607,7 @@ def _repr_fits_horizontal_(self, ignore_width: bool = False) -> bool:
607607
Check if full repr fits in horizontal boundaries imposed by the display
608608
options width and max_columns.
609609
610-
In case off non-interactive session, no boundaries apply.
610+
In case of non-interactive session, no boundaries apply.
611611
612612
`ignore_width` is here so ipnb+HTML output can behave the way
613613
users expect. display.max_columns remains in effect.

pandas/core/groupby/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ class providing the base-class of operations.
7272

7373
_apply_docs = dict(
7474
template="""
75-
Apply function `func` group-wise and combine the results together.
75+
Apply function `func` group-wise and combine the results together.
7676
7777
The function passed to `apply` must take a {input} as its first
7878
argument and return a DataFrame, Series or scalar. `apply` will

pandas/core/groupby/ops.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ def get_group_levels(self):
350350

351351
def _is_builtin_func(self, arg):
352352
"""
353-
if we define an builtin function for this argument, return it,
353+
if we define a builtin function for this argument, return it,
354354
otherwise return the arg
355355
"""
356356
return SelectionMixin._builtin_table.get(arg, arg)

pandas/core/indexes/base.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from datetime import datetime
22
import operator
33
from textwrap import dedent
4-
from typing import Any, Dict, FrozenSet, Hashable, Optional, Union
4+
from typing import Any, FrozenSet, Hashable, Optional, Union
55
import warnings
66

77
import numpy as np
@@ -76,6 +76,7 @@
7676
from pandas.core.strings import StringMethods
7777

7878
from pandas.io.formats.printing import (
79+
PrettyDict,
7980
default_pprint,
8081
format_object_attrs,
8182
format_object_summary,
@@ -4783,7 +4784,7 @@ def _maybe_promote(self, other):
47834784
return self.astype("object"), other.astype("object")
47844785
return self, other
47854786

4786-
def groupby(self, values) -> Dict[Hashable, np.ndarray]:
4787+
def groupby(self, values) -> PrettyDict[Hashable, np.ndarray]:
47874788
"""
47884789
Group the index labels by a given array of values.
47894790
@@ -4808,7 +4809,7 @@ def groupby(self, values) -> Dict[Hashable, np.ndarray]:
48084809
# map to the label
48094810
result = {k: self.take(v) for k, v in result.items()}
48104811

4811-
return result
4812+
return PrettyDict(result)
48124813

48134814
def map(self, mapper, na_action=None):
48144815
"""

pandas/io/formats/printing.py

+11
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,14 @@
66
from typing import (
77
Any,
88
Callable,
9+
Dict,
910
Iterable,
1011
List,
1112
Mapping,
1213
Optional,
1314
Sequence,
1415
Tuple,
16+
TypeVar,
1517
Union,
1618
)
1719

@@ -20,6 +22,8 @@
2022
from pandas.core.dtypes.inference import is_sequence
2123

2224
EscapeChars = Union[Mapping[str, str], Iterable[str]]
25+
_KT = TypeVar("_KT")
26+
_VT = TypeVar("_VT")
2327

2428

2529
def adjoin(space: int, *lists: List[str], **kwargs) -> str:
@@ -528,3 +532,10 @@ def format_object_attrs(
528532
if len(obj) > max_seq_items:
529533
attrs.append(("length", len(obj)))
530534
return attrs
535+
536+
537+
class PrettyDict(Dict[_KT, _VT]):
538+
"""Dict extension to support abbreviated __repr__"""
539+
540+
def __repr__(self) -> str:
541+
return pprint_thing(self)

pandas/tests/groupby/test_groupby.py

+20
Original file line numberDiff line numberDiff line change
@@ -2037,3 +2037,23 @@ def test_groupby_list_level():
20372037
expected = pd.DataFrame(np.arange(0, 9).reshape(3, 3))
20382038
result = expected.groupby(level=[0]).mean()
20392039
tm.assert_frame_equal(result, expected)
2040+
2041+
2042+
@pytest.mark.parametrize(
2043+
"max_seq_items, expected",
2044+
[
2045+
(5, "{0: [0], 1: [1], 2: [2], 3: [3], 4: [4]}"),
2046+
(4, "{0: [0], 1: [1], 2: [2], 3: [3], ...}"),
2047+
],
2048+
)
2049+
def test_groups_repr_truncates(max_seq_items, expected):
2050+
# GH 1135
2051+
df = pd.DataFrame(np.random.randn(5, 1))
2052+
df["a"] = df.index
2053+
2054+
with pd.option_context("display.max_seq_items", max_seq_items):
2055+
result = df.groupby("a").groups.__repr__()
2056+
assert result == expected
2057+
2058+
result = df.groupby(np.array(df.a)).groups.__repr__()
2059+
assert result == expected

0 commit comments

Comments
 (0)