Skip to content

Commit 7064f09

Browse files
author
Marco Gorelli
committed
Fix conflicts, move classes according to previous review
1 parent 2985c31 commit 7064f09

File tree

7 files changed

+23
-51
lines changed

7 files changed

+23
-51
lines changed

doc/source/whatsnew/v1.1.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ Other API changes
5454

5555
- :meth:`Series.describe` will now show distribution percentiles for ``datetime`` dtypes, statistics ``first`` and ``last``
5656
will now be ``min`` and ``max`` to match with numeric dtypes in :meth:`DataFrame.describe` (:issue:`30164`)
57-
-
57+
- :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`)
5858

5959
Backwards incompatible API changes
6060
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

pandas/core/groupby/groupby.py

-23
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ class providing the base-class of operations.
5656
from pandas.core.arrays import Categorical, DatetimeArray, try_cast_to_ea
5757
from pandas.core.base import DataError, PandasObject, SelectionMixin
5858
import pandas.core.common as com
59-
from pandas.core.config import option_context
6059
from pandas.core.frame import DataFrame
6160
from pandas.core.generic import NDFrame
6261
from pandas.core.groupby import base, ops
@@ -2568,25 +2567,3 @@ def get_groupby(
25682567
observed=observed,
25692568
mutated=mutated,
25702569
)
2571-
2572-
2573-
class DataFrameGroups(dict):
2574-
def __repr__(self):
2575-
from pandas.compat import u
2576-
2577-
nitems = get_option('display.max_rows') or len(self)
2578-
2579-
fmt = u("{{{things}}}")
2580-
pfmt = u("{key}: {val}")
2581-
2582-
pairs = []
2583-
for k, v in list(self.items()):
2584-
pairs.append(pfmt.format(key=k, val=v))
2585-
2586-
if nitems < len(self):
2587-
start_cnt, end_cnt = nitems - int(nitems / 2), int(nitems / 2)
2588-
return fmt.format(things=", ".join(pairs[:start_cnt]) +
2589-
", ... , " +
2590-
", ".join(pairs[-end_cnt:]))
2591-
else:
2592-
return fmt.format(things=", ".join(pairs))

pandas/core/groupby/ops.py

-1
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,6 @@ def size(self) -> Series:
246246
@cache_readonly
247247
def groups(self):
248248
""" dict {group name -> group labels} """
249-
250249
if len(self.groupings) == 1:
251250
return self.groupings[0].groups
252251
else:

pandas/core/indexes/base.py

+2-10
Original file line numberDiff line numberDiff line change
@@ -68,14 +68,14 @@
6868
from pandas.core.base import IndexOpsMixin, PandasObject
6969
import pandas.core.common as com
7070
from pandas.core.indexers import maybe_convert_indices
71-
from pandas.core.config import get_option
7271
from pandas.core.indexes.frozen import FrozenList
7372
import pandas.core.missing as missing
7473
from pandas.core.ops import get_op_result_name
7574
from pandas.core.ops.invalid import make_invalid_op
7675
from pandas.core.strings import StringMethods
7776

7877
from pandas.io.formats.printing import (
78+
PrettyDict,
7979
default_pprint,
8080
format_object_attrs,
8181
format_object_summary,
@@ -4791,7 +4791,7 @@ def groupby(self, values) -> Dict[Hashable, np.ndarray]:
47914791
# map to the label
47924792
result = {k: self.take(v) for k, v in result.items()}
47934793

4794-
return IndexGroupbyGroups(result)
4794+
return PrettyDict(result)
47954795

47964796
def map(self, mapper, na_action=None):
47974797
"""
@@ -5502,14 +5502,6 @@ def shape(self):
55025502
Index._add_comparison_methods()
55035503

55045504

5505-
class IndexGroupbyGroups(dict):
5506-
"""Dict extension to support abbreviated __repr__"""
5507-
from pandas.io.formats.printing import pprint_thing
5508-
5509-
def __repr__(self):
5510-
return pprint_thing(self, max_seq_items=get_option('display.max_rows'))
5511-
5512-
55135505
def ensure_index_from_sequences(sequences, names=None):
55145506
"""
55155507
Construct an index from sequences of data.

pandas/io/formats/printing.py

+7
Original file line numberDiff line numberDiff line change
@@ -528,3 +528,10 @@ def format_object_attrs(
528528
if len(obj) > max_seq_items:
529529
attrs.append(("length", len(obj)))
530530
return attrs
531+
532+
533+
class PrettyDict(dict):
534+
"""Dict extension to support abbreviated __repr__"""
535+
536+
def __repr__(self):
537+
return pprint_thing(self, max_seq_items=get_option("display.max_rows"))

pandas/tests/groupby/test_groupby.py

+13
Original file line numberDiff line numberDiff line change
@@ -2037,3 +2037,16 @@ def test_groupby_list_level():
20372037
expected = pd.DataFrame(np.arange(0, 9).reshape(3, 3))
20382038
result = expected.groupby(level=[0]).mean()
20392039
tm.assert_frame_equal(result, expected)
2040+
2041+
2042+
def test_groups_repr_truncates():
2043+
# GH 1135
2044+
df = pd.DataFrame({"a": [1, 1, 1, 2, 2, 3], "b": [1, 2, 3, 4, 5, 6]})
2045+
2046+
with pd.option_context("display.max_rows", 2):
2047+
x = df.groupby("a").groups
2048+
assert x.__repr__().endswith("...}")
2049+
2050+
with pd.option_context("display.max_rows", 5):
2051+
x = df.groupby(np.array(df.a)).groups
2052+
assert not x.__repr__().endswith("...}")

pandas/tests/io/formats/test_format.py

-16
Original file line numberDiff line numberDiff line change
@@ -2128,22 +2128,6 @@ def test_period(self):
21282128
assert str(df) == exp
21292129

21302130

2131-
class TestDataFrameGroupByFormatting(object):
2132-
def test_groups_repr_truncates(self):
2133-
df = pd.DataFrame({
2134-
'a': [1, 1, 1, 2, 2, 3],
2135-
'b': [1, 2, 3, 4, 5, 6]
2136-
})
2137-
2138-
with option_context('display.max_rows', 2):
2139-
x = df.groupby('a').groups
2140-
assert x.__repr__().endswith('...}')
2141-
2142-
with option_context('display.max_rows', 5):
2143-
x = df.groupby('a').groups
2144-
assert not x.__repr__().endswith('...}')
2145-
2146-
21472131
def gen_series_formatting():
21482132
s1 = pd.Series(["a"] * 100)
21492133
s2 = pd.Series(["ab"] * 100)

0 commit comments

Comments
 (0)