diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index f0a359a75f8fc..47b61ac39ee7f 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -212,8 +212,9 @@ Other API Changes - :class:`Timestamp` and :class:`Timedelta` scalars now implement the :meth:`to_numpy` method as aliases to :meth:`Timestamp.to_datetime64` and :meth:`Timedelta.to_timedelta64`, respectively. (:issue:`24653`) - :meth:`Timestamp.strptime` will now rise a ``NotImplementedError`` (:issue:`25016`) - Comparing :class:`Timestamp` with unsupported objects now returns :py:obj:`NotImplemented` instead of raising ``TypeError``. This implies that unsupported rich comparisons are delegated to the other object, and are now consistent with Python 3 behavior for ``datetime`` objects (:issue:`24011`) -- Bug in :meth:`DatetimeIndex.snap` which didn't preserving the ``name`` of the input :class:`Index` (:issue:`25575`) +- Bug in :meth:`DatetimeIndex.snap` which didn't preserve the ``name`` of the input :class:`Index` (:issue:`25575`) - The ``arg`` argument in :meth:`pandas.core.groupby.DataFrameGroupBy.agg` has been renamed to ``func`` (:issue:`26089`) +- :meth:`Index.groupby` and dependent methods (notably :attr:`GroupBy.groups`) now return object with abbreviated repr (:issue:`1135`) .. _whatsnew_0250.deprecations: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3248b708cd7aa..5ba24f44b6b53 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -554,7 +554,7 @@ def _repr_fits_horizontal_(self, ignore_width=False): Check if full repr fits in horizontal boundaries imposed by the display options width and max_columns. - In case off non-interactive session, no boundaries apply. + In case of non-interactive session, no boundaries apply. `ignore_width` is here so ipnb+HTML output can behave the way users expect. display.max_columns remains in effect. diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index bd8a8852964e3..163c96f93bc36 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -56,7 +56,7 @@ class providing the base-class of operations. _apply_docs = dict( template=""" - Apply function `func` group-wise and combine the results together. + Apply function `func` group-wise and combine the results together. The function passed to `apply` must take a {input} as its first argument and return a DataFrame, Series or scalar. `apply` will diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 8145e5000c056..343530bc475c5 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -234,7 +234,6 @@ class Grouping: def __init__(self, index, grouper=None, obj=None, name=None, level=None, sort=True, observed=False, in_axis=False): - self.name = name self.level = level self.grouper = _convert_grouper(index, grouper) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index e6b7577d97bad..76cb72379604f 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -256,6 +256,7 @@ def size(self): @cache_readonly def groups(self): """ dict {group name -> group labels} """ + if len(self.groupings) == 1: return self.groupings[0].groups else: @@ -382,7 +383,7 @@ def get_group_levels(self): def _is_builtin_func(self, arg): """ - if we define an builtin function for this argument, return it, + if we define a builtin function for this argument, return it, otherwise return the arg """ return SelectionMixin._builtin_table.get(arg, arg) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6bb8f299e811f..e873808c2f8f9 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -6,6 +6,8 @@ import numpy as np +from pandas._config.config import get_option + from pandas._libs import ( algos as libalgos, index as libindex, join as libjoin, lib) from pandas._libs.lib import is_datetime_array @@ -4484,7 +4486,7 @@ def groupby(self, values): # map to the label result = {k: self.take(v) for k, v in result.items()} - return result + return IndexGroupbyGroups(result) def map(self, mapper, na_action=None): """ @@ -5274,6 +5276,14 @@ def _add_logical_methods_disabled(cls): Index._add_comparison_methods() +class IndexGroupbyGroups(dict): + """Dict extension to support abbreviated __repr__""" + from pandas.io.formats.printing import pprint_thing + + def __repr__(self): + return pprint_thing(self, max_seq_items=get_option('display.max_rows')) + + def ensure_index_from_sequences(sequences, names=None): """ Construct an index from sequences of data. diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index bee66fcbfaa82..7acc6a9adf7cf 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -92,7 +92,7 @@ def _join_unicode(lines, sep=''): def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds): """ internal. pprinter for iterables. you should probably use pprint_thing() - rather then calling this directly. + rather than calling this directly. bounds length of printed sequence, depending on options """ @@ -124,8 +124,9 @@ def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds): def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds): """ internal. pprinter for iterables. you should probably use pprint_thing() - rather then calling this directly. + rather than calling this directly. """ + fmt = "{{{things}}}" pairs = [] diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 9739fe580f4cf..b56e0864fdf70 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1761,6 +1761,22 @@ def test_period(self): assert str(df) == exp +class TestDataFrameGroupByFormatting(object): + def test_groups_repr_truncates(self): + df = pd.DataFrame({ + 'a': [1, 1, 1, 2, 2, 3], + 'b': [1, 2, 3, 4, 5, 6] + }) + + with option_context('display.max_rows', 2): + x = df.groupby('a').groups + assert x.__repr__().endswith('...}') + + with option_context('display.max_rows', 5): + x = df.groupby('a').groups + assert not x.__repr__().endswith('...}') + + def gen_series_formatting(): s1 = pd.Series(['a'] * 100) s2 = pd.Series(['ab'] * 100)