Skip to content

ENH: truncate output of Groupby.groups #31388

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/development/contributing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ check each issue individually, and it's not possible to find the unassigned ones

For this reason, we implemented a workaround consisting of adding a comment with the exact
text `take`. When you do it, a GitHub action will automatically assign you the issue
(this will take seconds, and may require refreshint the page to see it).
(this will take seconds, and may require refreshing the page to see it).
By doing this, it's possible to filter the list of issues and find only the unassigned ones.

So, a good way to find an issue to start contributing to pandas is to check the list of
Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ Other API changes

- :meth:`Series.describe` will now show distribution percentiles for ``datetime`` dtypes, statistics ``first`` and ``last``
will now be ``min`` and ``max`` to match with numeric dtypes in :meth:`DataFrame.describe` (:issue:`30164`)
-
- :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`)

Backwards incompatible API changes
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,7 +607,7 @@ def _repr_fits_horizontal_(self, ignore_width: bool = False) -> bool:
Check if full repr fits in horizontal boundaries imposed by the display
options width and max_columns.

In case off non-interactive session, no boundaries apply.
In case of non-interactive session, no boundaries apply.

`ignore_width` is here so ipnb+HTML output can behave the way
users expect. display.max_columns remains in effect.
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ class providing the base-class of operations.

_apply_docs = dict(
template="""
Apply function `func` group-wise and combine the results together.
Apply function `func` group-wise and combine the results together.

The function passed to `apply` must take a {input} as its first
argument and return a DataFrame, Series or scalar. `apply` will
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ def get_group_levels(self):

def _is_builtin_func(self, arg):
"""
if we define an builtin function for this argument, return it,
if we define a builtin function for this argument, return it,
otherwise return the arg
"""
return SelectionMixin._builtin_table.get(arg, arg)
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from datetime import datetime
import operator
from textwrap import dedent
from typing import Any, Dict, FrozenSet, Hashable, Optional, Union
from typing import Any, FrozenSet, Hashable, Optional, Union
import warnings

import numpy as np
Expand Down Expand Up @@ -75,6 +75,7 @@
from pandas.core.strings import StringMethods

from pandas.io.formats.printing import (
PrettyDict,
default_pprint,
format_object_attrs,
format_object_summary,
Expand Down Expand Up @@ -4765,7 +4766,7 @@ def _maybe_promote(self, other):
return self.astype("object"), other.astype("object")
return self, other

def groupby(self, values) -> Dict[Hashable, np.ndarray]:
def groupby(self, values) -> PrettyDict[Hashable, np.ndarray]:
"""
Group the index labels by a given array of values.

Expand All @@ -4790,7 +4791,7 @@ def groupby(self, values) -> Dict[Hashable, np.ndarray]:
# map to the label
result = {k: self.take(v) for k, v in result.items()}

return result
return PrettyDict(result)

def map(self, mapper, na_action=None):
"""
Expand Down
11 changes: 11 additions & 0 deletions pandas/io/formats/printing.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
from typing import (
Any,
Callable,
Dict,
Iterable,
List,
Mapping,
Optional,
Sequence,
Tuple,
TypeVar,
Union,
)

Expand All @@ -20,6 +22,8 @@
from pandas.core.dtypes.inference import is_sequence

EscapeChars = Union[Mapping[str, str], Iterable[str]]
_KT = TypeVar("_KT")
_VT = TypeVar("_VT")


def adjoin(space: int, *lists: List[str], **kwargs) -> str:
Expand Down Expand Up @@ -528,3 +532,10 @@ def format_object_attrs(
if len(obj) > max_seq_items:
attrs.append(("length", len(obj)))
return attrs


class PrettyDict(Dict[_KT, _VT]):
"""Dict extension to support abbreviated __repr__"""

def __repr__(self) -> str:
return pprint_thing(self)
20 changes: 20 additions & 0 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2037,3 +2037,23 @@ def test_groupby_list_level():
expected = pd.DataFrame(np.arange(0, 9).reshape(3, 3))
result = expected.groupby(level=[0]).mean()
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
"max_seq_items, expected",
[
(5, "{0: [0], 1: [1], 2: [2], 3: [3], 4: [4]}"),
(4, "{0: [0], 1: [1], 2: [2], 3: [3], ...}"),
],
)
def test_groups_repr_truncates(max_seq_items, expected):
# GH 1135
df = pd.DataFrame(np.random.randn(5, 1))
df["a"] = df.index

with pd.option_context("display.max_seq_items", max_seq_items):
result = df.groupby("a").groups.__repr__()
assert result == expected

result = df.groupby(np.array(df.a)).groups.__repr__()
assert result == expected