Skip to content

Commit 7a886b5

Browse files
committed
Merge pull request #5105 from cpcloud/enable-groupby-plot-and-completion
BUG: allow plot, boxplot, hist and completion on GroupBy objects
2 parents a931f60 + bc2fa2f commit 7a886b5

File tree

5 files changed

+215
-39
lines changed

5 files changed

+215
-39
lines changed

doc/source/10min.rst

+37
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,43 @@ Having specific :ref:`dtypes <basics.dtypes>`
7474
7575
df2.dtypes
7676
77+
If you're using IPython, tab completion for column names (as well as public
78+
attributes) is automatically enabled. Here's a subset of the attributes that
79+
will be completed:
80+
81+
.. ipython::
82+
83+
@verbatim
84+
In [1]: df2.<TAB>
85+
86+
df2.A df2.boxplot
87+
df2.abs df2.C
88+
df2.add df2.clip
89+
df2.add_prefix df2.clip_lower
90+
df2.add_suffix df2.clip_upper
91+
df2.align df2.columns
92+
df2.all df2.combine
93+
df2.any df2.combineAdd
94+
df2.append df2.combine_first
95+
df2.apply df2.combineMult
96+
df2.applymap df2.compound
97+
df2.as_blocks df2.consolidate
98+
df2.asfreq df2.convert_objects
99+
df2.as_matrix df2.copy
100+
df2.astype df2.corr
101+
df2.at df2.corrwith
102+
df2.at_time df2.count
103+
df2.axes df2.cov
104+
df2.B df2.cummax
105+
df2.between_time df2.cummin
106+
df2.bfill df2.cumprod
107+
df2.blocks df2.cumsum
108+
df2.bool df2.D
109+
110+
As you can see, the columns ``A``, ``B``, ``C``, and ``D`` are automatically
111+
tab completed. ``E`` is there as well; the rest of the attributes have been
112+
truncated for brevity.
113+
77114
Viewing Data
78115
------------
79116

doc/source/groupby.rst

+39
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,45 @@ however pass ``sort=False`` for potential speedups:
188188
df2.groupby(['X'], sort=True).sum()
189189
df2.groupby(['X'], sort=False).sum()
190190
191+
.. _groupby.tabcompletion:
192+
193+
``GroupBy`` will tab complete column names (and other attributes)
194+
195+
.. ipython:: python
196+
:suppress:
197+
198+
n = 10
199+
weight = np.random.normal(166, 20, size=n)
200+
height = np.random.normal(60, 10, size=n)
201+
time = date_range('1/1/2000', periods=n)
202+
gender = tm.choice(['male', 'female'], size=n)
203+
df = DataFrame({'height': height, 'weight': weight,
204+
'gender': gender}, index=time)
205+
206+
.. ipython:: python
207+
208+
df
209+
gb = df.groupby('gender')
210+
211+
212+
.. ipython::
213+
214+
@verbatim
215+
In [1]: gb.<TAB>
216+
gb.agg gb.boxplot gb.cummin gb.describe gb.filter gb.get_group gb.height gb.last gb.median gb.ngroups gb.plot gb.rank gb.std gb.transform
217+
gb.aggregate gb.count gb.cumprod gb.dtype gb.first gb.groups gb.hist gb.max gb.min gb.nth gb.prod gb.resample gb.sum gb.var
218+
gb.apply gb.cummax gb.cumsum gb.fillna gb.gender gb.head gb.indices gb.mean gb.name gb.ohlc gb.quantile gb.size gb.tail gb.weight
219+
220+
221+
.. ipython:: python
222+
:suppress:
223+
224+
df = DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
225+
'foo', 'bar', 'foo', 'foo'],
226+
'B' : ['one', 'one', 'two', 'three',
227+
'two', 'two', 'one', 'three'],
228+
'C' : randn(8), 'D' : randn(8)})
229+
191230
.. _groupby.multiindex:
192231

193232
GroupBy with MultiIndex

doc/source/release.rst

+8-1
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,8 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
380380
function signature.
381381
- :func:`~pandas.read_html` now uses ``TextParser`` to parse HTML data from
382382
bs4/lxml (:issue:`4770`).
383+
- Removed the ``keep_internal`` keyword parameter in
384+
``pandas/core/groupby.py`` because it wasn't being used (:issue:`5102`).
383385

384386
.. _release.bug_fixes-0.13.0:
385387

@@ -544,7 +546,7 @@ Bug Fixes
544546
- Bug in setting with ``ix/loc`` and a mixed int/string index (:issue:`4544`)
545547
- Make sure series-series boolean comparions are label based (:issue:`4947`)
546548
- Bug in multi-level indexing with a Timestamp partial indexer (:issue:`4294`)
547-
- Tests/fix for multi-index construction of an all-nan frame (:isue:`4078`)
549+
- Tests/fix for multi-index construction of an all-nan frame (:issue:`4078`)
548550
- Fixed a bug where :func:`~pandas.read_html` wasn't correctly inferring
549551
values of tables with commas (:issue:`5029`)
550552
- Fixed a bug where :func:`~pandas.read_html` wasn't providing a stable
@@ -555,6 +557,11 @@ Bug Fixes
555557
type of headers (:issue:`5048`).
556558
- Fixed a bug where ``DatetimeIndex`` joins with ``PeriodIndex`` caused a
557559
stack overflow (:issue:`3899`).
560+
- Fixed a bug where ``groupby`` objects didn't allow plots (:issue:`5102`).
561+
- Fixed a bug where ``groupby`` objects weren't tab-completing column names
562+
(:issue:`5102`).
563+
- Fixed a bug where ``groupby.plot()`` and friends were duplicating figures
564+
multiple times (:issue:`5102`).
558565

559566

560567
pandas 0.12.0

pandas/core/groupby.py

+46-29
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import types
2+
from functools import wraps
23
import numpy as np
34

45
from pandas.compat import(
@@ -45,14 +46,19 @@
4546
"""
4647

4748

49+
# special case to prevent duplicate plots when catching exceptions when
50+
# forwarding methods from NDFrames
51+
_plotting_methods = frozenset(['plot', 'boxplot', 'hist'])
52+
4853
_apply_whitelist = frozenset(['last', 'first',
4954
'mean', 'sum', 'min', 'max',
5055
'head', 'tail',
5156
'cumsum', 'cumprod', 'cummin', 'cummax',
5257
'resample',
5358
'describe',
5459
'rank', 'quantile', 'count',
55-
'fillna', 'dtype'])
60+
'fillna', 'dtype']) | _plotting_methods
61+
5662

5763

5864
class GroupByError(Exception):
@@ -180,7 +186,6 @@ class GroupBy(PandasObject):
180186
len(grouped) : int
181187
Number of groups
182188
"""
183-
184189
def __init__(self, obj, keys=None, axis=0, level=None,
185190
grouper=None, exclusions=None, selection=None, as_index=True,
186191
sort=True, group_keys=True, squeeze=False):
@@ -244,6 +249,9 @@ def _selection_list(self):
244249
return [self._selection]
245250
return self._selection
246251

252+
def _local_dir(self):
253+
return sorted(set(self.obj._local_dir() + list(_apply_whitelist)))
254+
247255
def __getattr__(self, attr):
248256
if attr in self.obj:
249257
return self[attr]
@@ -285,6 +293,15 @@ def curried_with_axis(x):
285293
def curried(x):
286294
return f(x, *args, **kwargs)
287295

296+
# preserve the name so we can detect it when calling plot methods,
297+
# to avoid duplicates
298+
curried.__name__ = curried_with_axis.__name__ = name
299+
300+
# special case otherwise extra plots are created when catching the
301+
# exception below
302+
if name in _plotting_methods:
303+
return self.apply(curried)
304+
288305
try:
289306
return self.apply(curried_with_axis)
290307
except Exception:
@@ -348,7 +365,11 @@ def apply(self, func, *args, **kwargs):
348365
applied : type depending on grouped object and function
349366
"""
350367
func = _intercept_function(func)
351-
f = lambda g: func(g, *args, **kwargs)
368+
369+
@wraps(func)
370+
def f(g):
371+
return func(g, *args, **kwargs)
372+
352373
return self._python_apply_general(f)
353374

354375
def _python_apply_general(self, f):
@@ -598,7 +619,7 @@ def __iter__(self):
598619
def nkeys(self):
599620
return len(self.groupings)
600621

601-
def get_iterator(self, data, axis=0, keep_internal=True):
622+
def get_iterator(self, data, axis=0):
602623
"""
603624
Groupby iterator
604625
@@ -607,16 +628,14 @@ def get_iterator(self, data, axis=0, keep_internal=True):
607628
Generator yielding sequence of (name, subsetted object)
608629
for each group
609630
"""
610-
splitter = self._get_splitter(data, axis=axis,
611-
keep_internal=keep_internal)
631+
splitter = self._get_splitter(data, axis=axis)
612632
keys = self._get_group_keys()
613633
for key, (i, group) in zip(keys, splitter):
614634
yield key, group
615635

616-
def _get_splitter(self, data, axis=0, keep_internal=True):
636+
def _get_splitter(self, data, axis=0):
617637
comp_ids, _, ngroups = self.group_info
618-
return get_splitter(data, comp_ids, ngroups, axis=axis,
619-
keep_internal=keep_internal)
638+
return get_splitter(data, comp_ids, ngroups, axis=axis)
620639

621640
def _get_group_keys(self):
622641
if len(self.groupings) == 1:
@@ -627,19 +646,19 @@ def _get_group_keys(self):
627646
mapper = _KeyMapper(comp_ids, ngroups, self.labels, self.levels)
628647
return [mapper.get_key(i) for i in range(ngroups)]
629648

630-
def apply(self, f, data, axis=0, keep_internal=False):
649+
def apply(self, f, data, axis=0):
631650
mutated = False
632-
splitter = self._get_splitter(data, axis=axis,
633-
keep_internal=keep_internal)
651+
splitter = self._get_splitter(data, axis=axis)
634652
group_keys = self._get_group_keys()
635653

636654
# oh boy
637-
if hasattr(splitter, 'fast_apply') and axis == 0:
655+
if (f.__name__ not in _plotting_methods and
656+
hasattr(splitter, 'fast_apply') and axis == 0):
638657
try:
639658
values, mutated = splitter.fast_apply(f, group_keys)
640659
return group_keys, values, mutated
641-
except (Exception) as detail:
642-
# we detect a mutatation of some kind
660+
except Exception:
661+
# we detect a mutation of some kind
643662
# so take slow path
644663
pass
645664

@@ -1043,7 +1062,7 @@ def get_iterator(self, data, axis=0):
10431062
inds = lrange(start, n)
10441063
yield self.binlabels[-1], data.take(inds, axis=axis)
10451064

1046-
def apply(self, f, data, axis=0, keep_internal=False):
1065+
def apply(self, f, data, axis=0):
10471066
result_keys = []
10481067
result_values = []
10491068
mutated = False
@@ -1617,6 +1636,7 @@ def filter(self, func, dropna=True, *args, **kwargs):
16171636
else:
16181637
return filtered.reindex(self.obj.index) # Fill with NaNs.
16191638

1639+
16201640
class NDFrameGroupBy(GroupBy):
16211641

16221642
def _iterate_slices(self):
@@ -1939,14 +1959,14 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
19391959
index = key_index
19401960
else:
19411961
stacked_values = np.vstack([np.asarray(x)
1942-
for x in values]).T
1962+
for x in values]).T
19431963

19441964
index = values[0].index
19451965
columns = key_index
19461966

1947-
except ValueError:
1948-
#GH1738,, values is list of arrays of unequal lengths
1949-
# fall through to the outer else caluse
1967+
except (ValueError, AttributeError):
1968+
# GH1738: values is list of arrays of unequal lengths fall
1969+
# through to the outer else caluse
19501970
return Series(values, index=key_index)
19511971

19521972
return DataFrame(stacked_values, index=index,
@@ -2268,6 +2288,7 @@ def ohlc(self):
22682288
"""
22692289
return self._apply_to_column_groupbys(lambda x: x._cython_agg_general('ohlc'))
22702290

2291+
22712292
from pandas.tools.plotting import boxplot_frame_groupby
22722293
DataFrameGroupBy.boxplot = boxplot_frame_groupby
22732294

@@ -2364,7 +2385,7 @@ class NDArrayGroupBy(GroupBy):
23642385

23652386
class DataSplitter(object):
23662387

2367-
def __init__(self, data, labels, ngroups, axis=0, keep_internal=False):
2388+
def __init__(self, data, labels, ngroups, axis=0):
23682389
self.data = data
23692390
self.labels = com._ensure_int64(labels)
23702391
self.ngroups = ngroups
@@ -2419,10 +2440,8 @@ def _chop(self, sdata, slice_obj):
24192440

24202441

24212442
class FrameSplitter(DataSplitter):
2422-
2423-
def __init__(self, data, labels, ngroups, axis=0, keep_internal=False):
2424-
DataSplitter.__init__(self, data, labels, ngroups, axis=axis,
2425-
keep_internal=keep_internal)
2443+
def __init__(self, data, labels, ngroups, axis=0):
2444+
super(FrameSplitter, self).__init__(data, labels, ngroups, axis=axis)
24262445

24272446
def fast_apply(self, f, names):
24282447
# must return keys::list, values::list, mutated::bool
@@ -2445,10 +2464,8 @@ def _chop(self, sdata, slice_obj):
24452464

24462465

24472466
class NDFrameSplitter(DataSplitter):
2448-
2449-
def __init__(self, data, labels, ngroups, axis=0, keep_internal=False):
2450-
DataSplitter.__init__(self, data, labels, ngroups, axis=axis,
2451-
keep_internal=keep_internal)
2467+
def __init__(self, data, labels, ngroups, axis=0):
2468+
super(NDFrameSplitter, self).__init__(data, labels, ngroups, axis=axis)
24522469

24532470
self.factory = data._constructor
24542471

0 commit comments

Comments
 (0)