Skip to content

Commit 0d81f0a

Browse files
committed
TST: add unit tests for PR #965, agg by level, close #934
1 parent d3d43a7 commit 0d81f0a

File tree

6 files changed

+65
-22
lines changed

6 files changed

+65
-22
lines changed

pandas/core/frame.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -3586,23 +3586,23 @@ def mad(self, axis=0, skipna=True, level=None):
35863586
demeaned = frame.sub(frame.mean(axis=1), axis=0)
35873587
return np.abs(demeaned).mean(axis=axis, skipna=skipna)
35883588

3589-
@Substitution(name='unbiased variance', shortname='var',
3589+
@Substitution(name='variance', shortname='var',
35903590
na_action=_doc_exclude_na, extras='')
35913591
@Appender(_stat_doc)
35923592
def var(self, axis=0, skipna=True, level=None, ddof=1):
35933593
if level is not None:
35943594
return self._agg_by_level('var', axis=axis, level=level,
3595-
skipna=skipna)
3595+
skipna=skipna, ddof=ddof)
35963596
return self._reduce(nanops.nanvar, axis=axis, skipna=skipna,
35973597
numeric_only=None, ddof=ddof)
35983598

3599-
@Substitution(name='unbiased standard deviation', shortname='std',
3599+
@Substitution(name='standard deviation', shortname='std',
36003600
na_action=_doc_exclude_na, extras='')
36013601
@Appender(_stat_doc)
36023602
def std(self, axis=0, skipna=True, level=None, ddof=1):
36033603
if level is not None:
36043604
return self._agg_by_level('std', axis=axis, level=level,
3605-
skipna=skipna)
3605+
skipna=skipna, ddof=ddof)
36063606
return np.sqrt(self.var(axis=axis, skipna=skipna, ddof=ddof))
36073607

36083608
@Substitution(name='unbiased skewness', shortname='skew',
@@ -3615,12 +3615,12 @@ def skew(self, axis=0, skipna=True, level=None):
36153615
return self._reduce(nanops.nanskew, axis=axis, skipna=skipna,
36163616
numeric_only=None)
36173617

3618-
def _agg_by_level(self, name, axis=0, level=0, skipna=True):
3618+
def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwds):
36193619
grouped = self.groupby(level=level, axis=axis)
36203620
if hasattr(grouped, name) and skipna:
3621-
return getattr(grouped, name)()
3621+
return getattr(grouped, name)(**kwds)
36223622
method = getattr(type(self), name)
3623-
applyf = lambda x: method(x, axis=axis, skipna=skipna)
3623+
applyf = lambda x: method(x, axis=axis, skipna=skipna, **kwds)
36243624
return grouped.aggregate(applyf)
36253625

36263626
def _reduce(self, op, axis=0, skipna=True, numeric_only=None, **kwds):

pandas/core/groupby.py

+13-4
Original file line numberDiff line numberDiff line change
@@ -263,21 +263,30 @@ def mean(self):
263263
f = lambda x: x.mean(axis=self.axis)
264264
return self._python_agg_general(f)
265265

266-
def std(self):
266+
def std(self, ddof=1):
267267
"""
268268
Compute standard deviation of groups, excluding missing values
269269
270270
For multiple groupings, the result index will be a MultiIndex
271271
"""
272-
return self._cython_agg_general('std')
272+
# todo, implement at cython level?
273+
if ddof == 1:
274+
return self._cython_agg_general('std')
275+
else:
276+
f = lambda x: x.std(ddof=ddof)
277+
return self._python_agg_general(f)
273278

274-
def var(self):
279+
def var(self, ddof=1):
275280
"""
276281
Compute variance of groups, excluding missing values
277282
278283
For multiple groupings, the result index will be a MultiIndex
279284
"""
280-
return self._cython_agg_general('var')
285+
if ddof == 1:
286+
return self._cython_agg_general('var')
287+
else:
288+
f = lambda x: x.var(ddof=ddof)
289+
return self._python_agg_general(f)
281290

282291
def size(self):
283292
"""

pandas/core/series.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -1005,22 +1005,24 @@ def max(self, axis=None, out=None, skipna=True, level=None):
10051005
return self._agg_by_level('max', level=level, skipna=skipna)
10061006
return nanops.nanmax(self.values, skipna=skipna)
10071007

1008-
@Substitution(name='unbiased standard deviation', shortname='stdev',
1008+
@Substitution(name='standard deviation', shortname='stdev',
10091009
na_action=_doc_exclude_na, extras='')
10101010
@Appender(_stat_doc)
10111011
def std(self, axis=None, dtype=None, out=None, ddof=1, skipna=True,
10121012
level=None):
10131013
if level is not None:
1014-
return self._agg_by_level('std', level=level, skipna=skipna)
1014+
return self._agg_by_level('std', level=level, skipna=skipna,
1015+
ddof=ddof)
10151016
return np.sqrt(nanops.nanvar(self.values, skipna=skipna, ddof=ddof))
10161017

1017-
@Substitution(name='unbiased variance', shortname='var',
1018+
@Substitution(name='variance', shortname='var',
10181019
na_action=_doc_exclude_na, extras='')
10191020
@Appender(_stat_doc)
10201021
def var(self, axis=None, dtype=None, out=None, ddof=1, skipna=True,
10211022
level=None):
10221023
if level is not None:
1023-
return self._agg_by_level('var', level=level, skipna=skipna)
1024+
return self._agg_by_level('var', level=level, skipna=skipna,
1025+
ddof=ddof)
10241026
return nanops.nanvar(self.values, skipna=skipna, ddof=ddof)
10251027

10261028
@Substitution(name='unbiased skewness', shortname='skew',
@@ -1032,12 +1034,12 @@ def skew(self, skipna=True, level=None):
10321034

10331035
return nanops.nanskew(self.values, skipna=skipna)
10341036

1035-
def _agg_by_level(self, name, level=0, skipna=True):
1037+
def _agg_by_level(self, name, level=0, skipna=True, **kwds):
10361038
grouped = self.groupby(level=level)
10371039
if hasattr(grouped, name) and skipna:
1038-
return getattr(grouped, name)()
1040+
return getattr(grouped, name)(**kwds)
10391041
method = getattr(type(self), name)
1040-
applyf = lambda x: method(x, skipna=skipna)
1042+
applyf = lambda x: method(x, skipna=skipna, **kwds)
10411043
return grouped.aggregate(applyf)
10421044

10431045
def idxmin(self, axis=None, out=None, skipna=True):

pandas/tests/test_frame.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -4293,14 +4293,21 @@ def test_mad(self):
42934293
f = lambda x: np.abs(x - x.mean()).mean()
42944294
self._check_stat_op('mad', f)
42954295

4296-
def test_var(self):
4296+
def test_var_std(self):
42974297
alt = lambda x: np.var(x, ddof=1)
42984298
self._check_stat_op('var', alt)
42994299

4300-
def test_std(self):
43014300
alt = lambda x: np.std(x, ddof=1)
43024301
self._check_stat_op('std', alt)
43034302

4303+
result = self.tsframe.std(ddof=4)
4304+
expected = self.tsframe.apply(lambda x: x.std(ddof=4))
4305+
assert_almost_equal(result, expected)
4306+
4307+
result = self.tsframe.var(ddof=4)
4308+
expected = self.tsframe.apply(lambda x: x.var(ddof=4))
4309+
assert_almost_equal(result, expected)
4310+
43044311
def test_skew(self):
43054312
from scipy.stats import skew
43064313

pandas/tests/test_multilevel.py

+18
Original file line numberDiff line numberDiff line change
@@ -930,6 +930,24 @@ def test_frame_group_ops(self):
930930

931931
assert_frame_equal(leftside, rightside)
932932

933+
def test_std_var_pass_ddof(self):
934+
index = MultiIndex.from_arrays([np.arange(5).repeat(10),
935+
np.tile(np.arange(10), 5)])
936+
df = DataFrame(np.random.randn(len(index), 5), index=index)
937+
938+
for meth in ['var', 'std']:
939+
ddof = 4
940+
alt = lambda x: getattr(x, meth)(ddof=ddof)
941+
942+
result = getattr(df[0], meth)(level=0, ddof=ddof)
943+
expected = df[0].groupby(level=0).agg(alt)
944+
assert_series_equal(result, expected)
945+
946+
result = getattr(df, meth)(level=0, ddof=ddof)
947+
expected = df.groupby(level=0).agg(alt)
948+
assert_frame_equal(result, expected)
949+
950+
933951
def test_frame_series_agg_multiple_levels(self):
934952
result = self.ymd.sum(level=['year', 'month'])
935953
expected = self.ymd.groupby(level=['year', 'month']).sum()

pandas/tests/test_series.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -880,14 +880,21 @@ def test_min(self):
880880
def test_max(self):
881881
self._check_stat_op('max', np.max, check_objects=True)
882882

883-
def test_std(self):
883+
def test_var_std(self):
884884
alt = lambda x: np.std(x, ddof=1)
885885
self._check_stat_op('std', alt)
886886

887-
def test_var(self):
888887
alt = lambda x: np.var(x, ddof=1)
889888
self._check_stat_op('var', alt)
890889

890+
result = self.ts.std(ddof=4)
891+
expected = np.std(self.ts.values, ddof=4)
892+
assert_almost_equal(result, expected)
893+
894+
result = self.ts.var(ddof=4)
895+
expected = np.var(self.ts.values, ddof=4)
896+
assert_almost_equal(result, expected)
897+
891898
def test_skew(self):
892899
from scipy.stats import skew
893900
alt =lambda x: skew(x, bias=False)

0 commit comments

Comments
 (0)