Skip to content

Commit 90de456

Browse files
committed
ENH: refactoring groupby plots, add DataFrame.boxplot function, close PR #287
1 parent 5d7f6b7 commit 90de456

File tree

3 files changed

+91
-14
lines changed

3 files changed

+91
-14
lines changed

RELEASE.rst

+5-3
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ pandas 0.5.1
3737
- Add `Series.isin` function which checks if each value is contained in a
3838
passed sequence (GH #289)
3939
- Add `float_format` option to `Series.to_string`
40-
- Add `skip_footer` (GH #291) and `converters` (GH #343 ) options to
40+
- Add `skip_footer` (GH #291) and `converters` (GH #343) options to
4141
`read_csv` and `read_table`
4242
- Add proper, tested weighted least squares to standard and panel OLS (GH
4343
#303)
@@ -48,7 +48,7 @@ pandas 0.5.1
4848
- Add `QuarterEnd` DateOffset (PR #321)
4949
- Add matrix multiplication function `dot` to DataFrame (GH #65)
5050
- Add `orient` option to `Panel.from_dict` to ease creation of mixed-type
51-
Panels (GH #359)
51+
Panels (GH #359, #301)
5252
- Add `DataFrame.from_dict` with similar `orient` option
5353
- Can now pass list of tuples or list of lists to `DataFrame.from_records`
5454
for fast conversion to DataFrame (GH #357)
@@ -64,7 +64,6 @@ pandas 0.5.1
6464
6565
**Improvements to existing features**
6666

67-
- Sped up `DataFrame.apply` performance in most cases
6867
- Raise more helpful exception if date parsing fails in DateRange (GH #298)
6968
- Vastly improved performance of GroupBy on axes with a MultiIndex (GH #299)
7069
- Print level names in hierarchical index in Series repr (GH #305)
@@ -119,6 +118,9 @@ pandas 0.5.1
119118
- Change `is_monotonic` on MultiIndex so it properly compares the tuples
120119
- Fix MultiIndex outer join logic (GH #351)
121120
- Set index name attribute with single-key groupby (GH #358)
121+
- Bug fix in reflexive binary addition in Series and DataFrame for
122+
non-commutative operations (like string concatenation) (GH #353)
123+
- setupegg.py will invoke Cython (GH #192)
122124

123125
Thanks
124126
------

pandas/core/frame.py

+25
Original file line numberDiff line numberDiff line change
@@ -3007,6 +3007,31 @@ def clip_lower(self, threshold):
30073007
#----------------------------------------------------------------------
30083008
# Plotting
30093009

3010+
def boxplot(self, column=None, by=None, ax=None, fontsize=None,
3011+
rot=0, grid=True, **kwds):
3012+
"""
3013+
Make a box plot from DataFrame column/columns optionally grouped
3014+
(stratified) by one or more columns
3015+
3016+
Parameters
3017+
----------
3018+
data : DataFrame
3019+
column : column names or list of names, or vector
3020+
Can be any valid input to groupby
3021+
by : string or sequence
3022+
Column in the DataFrame to group by
3023+
fontsize : int or string
3024+
3025+
Returns
3026+
-------
3027+
ax : matplotlib.axes.AxesSubplot
3028+
"""
3029+
import pandas.tools.plotting as plots
3030+
import matplotlib.pyplot as plt
3031+
ax = plots.boxplot(self, column=column, by=by, ax=ax, fontsize=fontsize,
3032+
grid=grid, rot=rot)
3033+
plt.draw_if_interactive()
3034+
30103035
def plot(self, subplots=False, sharex=True, sharey=False, use_index=True,
30113036
figsize=None, grid=True, legend=True, rot=30, ax=None,
30123037
kind='line', **kwds):

pandas/tools/plotting.py

+61-11
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ def hist(data, column, by=None, ax=None, fontsize=None):
1212
ax.set_xticklabels(keys, rotation=0, fontsize=fontsize)
1313
return ax
1414

15-
def boxplot(data, column, by=None, ax=None, fontsize=None, rot=0):
15+
def boxplot(data, column=None, by=None, ax=None, fontsize=None,
16+
rot=0, grid=True):
1617
"""
1718
Make a box plot from DataFrame column optionally grouped by some columns or
1819
other inputs
@@ -30,19 +31,38 @@ def boxplot(data, column, by=None, ax=None, fontsize=None, rot=0):
3031
-------
3132
ax : matplotlib.axes.AxesSubplot
3233
"""
33-
keys, values = zip(*data.groupby(by)[column])
34+
def plot_group(grouped, ax):
35+
keys, values = zip(*grouped)
36+
keys = [_stringify(x) for x in keys]
37+
ax.boxplot(values)
38+
ax.set_xticklabels(keys, rotation=rot, fontsize=fontsize)
3439

35-
if ax is None:
36-
ax = plt.gca()
37-
ax.boxplot(values)
38-
ax.set_xticklabels(keys, rotation=rot, fontsize=fontsize)
40+
if by is not None:
41+
if not isinstance(by, (list, tuple)):
42+
by = [by]
3943

40-
ax.set_xlabel(str(by))
41-
ax.set_ylabel(str(column))
44+
columns = None if column is None else [column]
45+
fig, axes = _grouped_plot_by_column(plot_group, data, columns=columns,
46+
by=by)
47+
ax = axes
48+
else:
49+
if ax is None:
50+
ax = plt.gca()
4251

43-
plt.subplots_adjust(bottom=0.15)
52+
data = data._get_numeric_data()
53+
keys = [_stringify(x) for x in data.columns]
54+
ax.boxplot(list(data.values.T))
55+
ax.set_xticklabels(keys, rotation=rot, fontsize=fontsize)
56+
57+
plt.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.1)
4458
return ax
4559

60+
def _stringify(x):
61+
if isinstance(x, tuple):
62+
return '|'.join(str(y) for y in x)
63+
else:
64+
return str(x)
65+
4666
def scatter_plot(data, x, y, by=None, ax=None):
4767
"""
4868
@@ -66,7 +86,7 @@ def plot_group(group, ax):
6686

6787
return fig
6888

69-
def _grouped_plot(plotf, data, by=None):
89+
def _grouped_plot(plotf, data, by=None, numeric_only=True):
7090
grouped = data.groupby(by)
7191
ngroups = len(grouped)
7292

@@ -80,10 +100,40 @@ def _grouped_plot(plotf, data, by=None):
80100

81101
for i, (key, group) in enumerate(grouped):
82102
ax = ravel_axes[i]
103+
if numeric_only:
104+
group = group._get_numeric_data()
83105
plotf(group, ax)
84106
ax.set_title(str(key))
85107

86-
return fig
108+
return fig, axes
109+
110+
def _grouped_plot_by_column(plotf, data, columns=None, by=None,
111+
numeric_only=True):
112+
grouped = data.groupby(by)
113+
if columns is None:
114+
columns = data.columns - by
115+
ngroups = len(columns)
116+
117+
nrows, ncols = _get_layout(ngroups)
118+
fig, axes = plt.subplots(nrows=nrows, ncols=ncols,
119+
sharex=True, sharey=True)
120+
121+
if isinstance(axes, plt.Axes):
122+
ravel_axes = [axes]
123+
else:
124+
ravel_axes = []
125+
for row in axes:
126+
ravel_axes.extend(row)
127+
128+
for i, col in enumerate(columns):
129+
ax = ravel_axes[i]
130+
gp_col = grouped[col]
131+
plotf(gp_col, ax)
132+
ax.set_title(col)
133+
134+
fig.suptitle('Boxplot grouped by %s' % by)
135+
136+
return fig, axes
87137

88138
def _get_layout(nplots):
89139
if nplots == 1:

0 commit comments

Comments
 (0)