Skip to content

Commit 08f8b57

Browse files
author
Chang She
committed
BUG: intercept NumPy median in groupby #1989. Also some doc fixes
1 parent 9796924 commit 08f8b57

File tree

6 files changed

+38
-30
lines changed

6 files changed

+38
-30
lines changed

pandas/core/frame.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -3947,9 +3947,9 @@ def apply(self, func, axis=0, broadcast=False, raw=False,
39473947
>>> df.apply(numpy.sum, axis=0) # equiv to df.sum(0)
39483948
>>> df.apply(numpy.sum, axis=1) # equiv to df.sum(1)
39493949
3950-
Notes
3951-
-----
3952-
To apply a function elementwise, use applymap
3950+
See also
3951+
--------
3952+
DataFrame.applymap: For elementwise operations
39533953
39543954
Returns
39553955
-------

pandas/core/groupby.py

+25-19
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,31 @@
1111
from pandas.core.panel import Panel
1212
from pandas.util.decorators import cache_readonly, Appender
1313
from pandas.util.compat import OrderedDict
14+
from pandas.util.decorators import Appender
1415
import pandas.core.algorithms as algos
1516
import pandas.core.common as com
1617
import pandas.lib as lib
1718

19+
_agg_doc = """Aggregate using input function or dict of {column -> function}
20+
21+
Parameters
22+
----------
23+
arg : function or dict
24+
Function to use for aggregating groups. If a function, must either
25+
work when passed a DataFrame or when passed to DataFrame.apply. If
26+
pass a dict, the keys must be DataFrame column names
27+
28+
Notes
29+
-----
30+
Numpy functions mean/median/prod/sum/std/var are special cased so the
31+
default behavior is applying the function along axis=0
32+
(e.g., np.mean(arr_2d, axis=0)) as opposed to
33+
mimicking the default Numpy behavior (e.g., np.mean(arr_2d)).
34+
35+
Returns
36+
-------
37+
aggregated : DataFrame
38+
"""
1839

1940
class GroupByError(Exception):
2041
pass
@@ -298,10 +319,8 @@ def apply(self, func, *args, **kwargs):
298319
def aggregate(self, func, *args, **kwargs):
299320
raise NotImplementedError
300321

322+
@Appender(_agg_doc)
301323
def agg(self, func, *args, **kwargs):
302-
"""
303-
See docstring for aggregate
304-
"""
305324
return self.aggregate(func, *args, **kwargs)
306325

307326
def _iterate_slices(self):
@@ -1508,21 +1527,8 @@ def _obj_with_exclusions(self):
15081527
else:
15091528
return self.obj
15101529

1530+
@Appender(_agg_doc)
15111531
def aggregate(self, arg, *args, **kwargs):
1512-
"""
1513-
Aggregate using input function or dict of {column -> function}
1514-
1515-
Parameters
1516-
----------
1517-
arg : function or dict
1518-
Function to use for aggregating groups. If a function, must either
1519-
work when passed a DataFrame or when passed to DataFrame.apply. If
1520-
pass a dict, the keys must be DataFrame column names
1521-
1522-
Returns
1523-
-------
1524-
aggregated : DataFrame
1525-
"""
15261532
if isinstance(arg, basestring):
15271533
return getattr(self, arg)(*args, **kwargs)
15281534

@@ -2238,10 +2244,10 @@ def _reorder_by_uniques(uniques, labels):
22382244
np.mean: 'mean',
22392245
np.prod: 'prod',
22402246
np.std: 'std',
2241-
np.var: 'var'
2247+
np.var: 'var',
2248+
np.median: 'median'
22422249
}
22432250

2244-
22452251
def _is_numeric_dtype(dt):
22462252
typ = dt.type
22472253
return (issubclass(typ, (np.number, np.bool_))

pandas/core/series.py

-5
Original file line numberDiff line numberDiff line change
@@ -2214,11 +2214,6 @@ def apply(self, func, convert_dtype=True, args=(), **kwds):
22142214
--------
22152215
Series.map: For element-wise operations
22162216
2217-
Notes
2218-
-----
2219-
func is applied to the entire Series at once first. If an exception
2220-
is raised, then apply to each value.
2221-
22222217
Returns
22232218
-------
22242219
y : Series or DataFrame if func returns a Series

pandas/tests/test_graphics.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,8 @@ def test_plot_xy(self):
238238
ax = df.plot(x=1, y=2, title='Test', figsize=(16, 8))
239239

240240
self.assert_(ax.title.get_text() == 'Test')
241-
self.assert_((np.round(ax.figure.get_size_inches()) == np.array((16., 8.))).all())
241+
self.assert_((np.round(ax.figure.get_size_inches())
242+
== np.array((16., 8.))).all())
242243

243244
# columns.inferred_type == 'mixed'
244245
# TODO add MultiIndex test

pandas/tests/test_groupby.py

+5
Original file line numberDiff line numberDiff line change
@@ -2052,6 +2052,11 @@ def test_cython_median(self):
20522052
exp = df.groupby(labels).agg(nanops.nanmedian)
20532053
assert_frame_equal(result, exp)
20542054

2055+
df = DataFrame(np.random.randn(1000, 5))
2056+
rs = df.groupby(labels).agg(np.median)
2057+
xp = df.groupby(labels).median()
2058+
assert_frame_equal(rs, xp)
2059+
20552060
def test_groupby_categorical_no_compress(self):
20562061
data = Series(np.random.randn(9))
20572062

pandas/tools/plotting.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1202,6 +1202,7 @@ def __init__(self, data, **kwargs):
12021202
self.tickoffset = 0.25
12031203
else:
12041204
self.tickoffset = 0.375
1205+
self.bar_width = 0.5
12051206
MPLPlot.__init__(self, data, **kwargs)
12061207

12071208
def _args_adjust(self):
@@ -1241,12 +1242,12 @@ def _make_plot(self):
12411242

12421243
if self.subplots:
12431244
ax = self._get_ax(i) # self.axes[i]
1244-
rect = bar_f(ax, self.ax_pos, y, 0.5, start=pos_prior, **kwds)
1245+
rect = bar_f(ax, self.ax_pos, y, self.bar_width, start=pos_prior, **kwds)
12451246
ax.set_title(label)
12461247
elif self.stacked:
12471248
mask = y > 0
12481249
start = np.where(mask, pos_prior, neg_prior)
1249-
rect = bar_f(ax, self.ax_pos, y, 0.5, start=start,
1250+
rect = bar_f(ax, self.ax_pos, y, self.bar_width, start=start,
12501251
label=label, **kwds)
12511252
pos_prior = pos_prior + np.where(mask, y, 0)
12521253
neg_prior = neg_prior + np.where(mask, 0, y)

0 commit comments

Comments
 (0)