Skip to content

Commit 68428d6

Browse files
committed
API: disallow renamed nested-dicts
1 parent 83238ed commit 68428d6

File tree

6 files changed

+88
-43
lines changed

6 files changed

+88
-43
lines changed

pandas/core/base.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,10 @@ def _selected_obj(self):
302302
else:
303303
return self.obj[self._selection]
304304

305+
@cache_readonly
306+
def ndim(self):
307+
return self._selected_obj.ndim
308+
305309
@cache_readonly
306310
def _obj_with_exclusions(self):
307311
if self._selection is not None and isinstance(self.obj,
@@ -438,14 +442,18 @@ def _aggregate(self, arg, *args, **kwargs):
438442
if self._selection is not None:
439443
subset = obj
440444

445+
ndim = 1 if len(self._selection_list) == 1 else 2
441446
for fname, agg_how in compat.iteritems(arg):
442-
colg = self._gotitem(self._selection, ndim=1,
447+
colg = self._gotitem(self._selection, ndim=ndim,
443448
subset=subset)
444449
result[fname] = colg.aggregate(agg_how, _level=None)
445450
keys.append(fname)
446451
else:
447452
for col, agg_how in compat.iteritems(arg):
448453
colg = self._gotitem(col, ndim=1)
454+
if colg.ndim != 1:
455+
raise ValueError("nested dictionary is ambiguous"
456+
"in aggregation")
449457
result[col] = colg.aggregate(agg_how, _level=_level)
450458
keys.append(col)
451459

pandas/core/window.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def _convert_freq(self, how=None):
7676
"to passing to a window function", FutureWarning,
7777
stacklevel=6)
7878

79-
obj = obj.resample(self.freq).aggregate(how or 'upsample')
79+
obj = obj.resample(self.freq).aggregate(how or 'asfreq')
8080
return obj
8181

8282
def _create_blocks(self, how):

pandas/tests/test_groupby.py

+9-14
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,6 @@
3434
from numpy.testing import assert_equal
3535

3636

37-
def compare_frame_like(result, expected):
38-
# if we are using dicts, the orderings is not guaranteed
39-
assert_frame_equal(result.reindex_like(expected), expected)
40-
41-
4237
class TestGroupBy(tm.TestCase):
4338

4439
_multiprocess_can_split_ = True
@@ -1484,7 +1479,7 @@ def test_aggregate_api_consistency(self):
14841479
expected = pd.concat([d_sum, d_mean],
14851480
axis=1)
14861481
expected.columns = ['sum', 'mean']
1487-
compare_frame_like(result, expected)
1482+
assert_frame_equal(result, expected, check_like=True)
14881483

14891484
result = grouped.agg([np.sum, np.mean])
14901485
expected = pd.concat([c_sum,
@@ -1494,7 +1489,7 @@ def test_aggregate_api_consistency(self):
14941489
axis=1)
14951490
expected.columns = MultiIndex.from_product([['C', 'D'],
14961491
['sum', 'mean']])
1497-
compare_frame_like(result, expected)
1492+
assert_frame_equal(result, expected, check_like=True)
14981493

14991494
result = grouped[['D', 'C']].agg([np.sum, np.mean])
15001495
expected = pd.concat([d_sum,
@@ -1504,18 +1499,18 @@ def test_aggregate_api_consistency(self):
15041499
axis=1)
15051500
expected.columns = MultiIndex.from_product([['D', 'C'],
15061501
['sum', 'mean']])
1507-
compare_frame_like(result, expected)
1502+
assert_frame_equal(result, expected, check_like=True)
15081503

1509-
import pdb; pdb.set_trace()
1510-
result = grouped[['D', 'C']].agg({'r': np.sum, 'r2': np.mean})
1504+
result = grouped[['D', 'C']].agg(OrderedDict([('r', np.sum),
1505+
('r2', np.mean)]))
15111506
expected = pd.concat([d_sum,
1512-
d_mean,
15131507
c_sum,
1508+
d_mean,
15141509
c_mean],
15151510
axis=1)
15161511
expected.columns = MultiIndex.from_product([['r', 'r2'],
1517-
['sum', 'mean']])
1518-
compare_frame_like(result, expected)
1512+
['D', 'C']])
1513+
assert_frame_equal(result, expected, check_like=True)
15191514

15201515
def test_multi_iter(self):
15211516
s = Series(np.arange(6))
@@ -5489,7 +5484,7 @@ def test_tab_completion(self):
54895484
'cumprod', 'tail', 'resample', 'cummin', 'fillna', 'cumsum',
54905485
'cumcount', 'all', 'shift', 'skew', 'bfill', 'ffill', 'take',
54915486
'tshift', 'pct_change', 'any', 'mad', 'corr', 'corrwith', 'cov',
5492-
'dtypes', 'diff', 'idxmax', 'idxmin'])
5487+
'dtypes', 'ndim', 'diff', 'idxmax', 'idxmin'])
54935488
self.assertEqual(results, expected)
54945489

54955490
def test_lexsort_indexer(self):

pandas/tests/test_window.py

+25-9
Original file line numberDiff line numberDiff line change
@@ -121,10 +121,6 @@ def test_agg(self):
121121
b_std = r['B'].std()
122122
b_sum = r['B'].sum()
123123

124-
def compare(result, expected):
125-
# if we are using dicts, the orderings is not guaranteed
126-
assert_frame_equal(result.reindex_like(expected), expected)
127-
128124
result = r.aggregate([np.mean, np.std])
129125
expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
130126
expected.columns = pd.MultiIndex.from_product([['A', 'B'], ['mean',
@@ -134,7 +130,7 @@ def compare(result, expected):
134130
result = r.aggregate({'A': np.mean, 'B': np.std})
135131

136132
expected = pd.concat([a_mean, b_std], axis=1)
137-
compare(result, expected)
133+
assert_frame_equal(result, expected, check_like=True)
138134

139135
result = r.aggregate({'A': ['mean', 'std']})
140136
expected = pd.concat([a_mean, a_std], axis=1)
@@ -151,7 +147,7 @@ def compare(result, expected):
151147
expected = pd.concat([a_mean, a_sum], axis=1)
152148
expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), ('A',
153149
'sum')])
154-
compare(result, expected)
150+
assert_frame_equal(result, expected, check_like=True)
155151

156152
result = r.aggregate({'A': {'mean': 'mean',
157153
'sum': 'sum'},
@@ -160,19 +156,19 @@ def compare(result, expected):
160156
expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1)
161157
expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), (
162158
'A', 'sum'), ('B', 'mean2'), ('B', 'sum2')])
163-
compare(result, expected)
159+
assert_frame_equal(result, expected, check_like=True)
164160

165161
result = r.aggregate({'A': ['mean', 'std'], 'B': ['mean', 'std']})
166162
expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
167163
expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), (
168164
'A', 'std'), ('B', 'mean'), ('B', 'std')])
169-
compare(result, expected)
165+
assert_frame_equal(result, expected, check_like=True)
170166

171167
# passed lambda
172168
result = r.agg({'A': np.sum, 'B': lambda x: np.std(x, ddof=1)})
173169
rcustom = r['B'].apply(lambda x: np.std(x, ddof=1))
174170
expected = pd.concat([a_sum, rcustom], axis=1)
175-
compare(result, expected)
171+
assert_frame_equal(result, expected, check_like=True)
176172

177173
def test_agg_consistency(self):
178174

@@ -191,6 +187,26 @@ def test_agg_consistency(self):
191187
expected = pd.MultiIndex.from_tuples([('A', 'sum'), ('A', 'mean')])
192188
tm.assert_index_equal(result, expected)
193189

190+
def test_agg_nested_dicts(self):
191+
192+
# API change for disallowing these types of nested dicts
193+
df = DataFrame({'A': range(5), 'B': range(0, 10, 2)})
194+
r = df.rolling(window=3)
195+
196+
def f():
197+
r.aggregate({'r1': {'A': ['mean', 'sum']},
198+
'r2': {'B': ['mean', 'sum']}})
199+
200+
self.assertRaises(ValueError, f)
201+
202+
result = r.agg({'A': {'ra': ['mean', 'std']},
203+
'B': {'rb': ['mean', 'std']}})
204+
expected = pd.concat([r['A'].mean(), r['A'].std(), r['B'].mean(),
205+
r['B'].std()], axis=1)
206+
expected.columns = pd.MultiIndex.from_tuples([('A', 'ra', 'mean'), (
207+
'A', 'ra', 'std'), ('B', 'rb', 'mean'), ('B', 'rb', 'std')])
208+
assert_frame_equal(result, expected, check_like=True)
209+
194210
def test_window_with_args(self):
195211
tm._skip_if_no_scipy()
196212

pandas/tseries/tests/test_resample.py

+29-14
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from datetime import datetime, timedelta
44
from functools import partial
55

6-
from pandas.compat import range, lrange, zip, product
6+
from pandas.compat import range, lrange, zip, product, OrderedDict
77
import numpy as np
88

99
from pandas import (Series, DataFrame, Panel, Index, isnull,
@@ -31,11 +31,6 @@
3131
bday = BDay()
3232

3333

34-
def compare_frame_like(result, expected):
35-
# if we are using dicts, the orderings is not guaranteed
36-
assert_frame_equal(result.reindex_like(expected), expected)
37-
38-
3934
class TestResampleAPI(tm.TestCase):
4035
_multiprocess_can_split_ = True
4136

@@ -211,7 +206,7 @@ def test_downsample_but_actually_upsampling(self):
211206

212207
# this is reindex / asfreq
213208
rng = pd.date_range('1/1/2012', periods=100, freq='S')
214-
ts = pd.Series(np.arange(len(rng)), index=rng)
209+
ts = pd.Series(np.arange(len(rng), dtype='int64'), index=rng)
215210
result = ts.resample('20s').asfreq()
216211
expected = Series([0, 20, 40, 60, 80],
217212
index=pd.date_range('2012-01-01 00:00:00',
@@ -271,7 +266,7 @@ def test_agg(self):
271266
for t in [r, g]:
272267
result = t.aggregate({'A': np.mean,
273268
'B': np.std})
274-
compare_frame_like(result, expected)
269+
assert_frame_equal(result, expected, check_like=True)
275270

276271
expected = pd.concat([a_mean, a_std], axis=1)
277272
expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'),
@@ -291,7 +286,7 @@ def test_agg(self):
291286
('A', 'sum')])
292287
for t in [r, g]:
293288
result = t.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}})
294-
compare_frame_like(result, expected)
289+
assert_frame_equal(result, expected, check_like=True)
295290

296291
expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1)
297292
expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'),
@@ -301,7 +296,7 @@ def test_agg(self):
301296
for t in [r, g]:
302297
result = t.aggregate({'A': {'mean': 'mean', 'sum': 'sum'},
303298
'B': {'mean2': 'mean', 'sum2': 'sum'}})
304-
compare_frame_like(result, expected)
299+
assert_frame_equal(result, expected, check_like=True)
305300

306301
expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
307302
expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'),
@@ -311,7 +306,7 @@ def test_agg(self):
311306
for t in [r, g]:
312307
result = t.aggregate({'A': ['mean', 'std'],
313308
'B': ['mean', 'std']})
314-
compare_frame_like(result, expected)
309+
assert_frame_equal(result, expected, check_like=True)
315310

316311
expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1)
317312
expected.columns = pd.MultiIndex.from_tuples([('r1', 'A', 'mean'),
@@ -338,19 +333,39 @@ def test_agg_misc(self):
338333
'B': lambda x: np.std(x, ddof=1)})
339334
rcustom = t['B'].apply(lambda x: np.std(x, ddof=1))
340335
expected = pd.concat([r['A'].sum(), rcustom], axis=1)
341-
compare_frame_like(result, expected)
336+
assert_frame_equal(result, expected, check_like=True)
342337

343338
# misc
339+
expected = pd.concat([t['A'].sum(),
340+
t['B'].sum(),
341+
t['A'].mean(),
342+
t['B'].mean()],
343+
axis=1)
344+
expected.columns = pd.MultiIndex.from_tuples([('result1', 'A'),
345+
('result1', 'B'),
346+
('result2', 'A'),
347+
('result2', 'B')])
344348
for t in [r, g]:
345-
t[['A', 'B']].agg({'result1': np.sum, 'result2': np.mean})
349+
result = t[['A', 'B']].agg(OrderedDict([('result1', np.sum),
350+
('result2', np.mean)]))
351+
assert_frame_equal(result, expected, check_like=True)
346352

347353
for t in [r, g]:
348354
t.agg({'A': ['sum', 'std'], 'B': ['mean', 'std']})
349355

356+
# what should this produce??????
357+
import pdb; pdb.set_trace()
350358
for t in [r, g]:
351359
t[['A', 'B']].agg({'A': ['sum', 'std'],
352360
'B': ['mean', 'std']})
353361

362+
# errors
363+
for t in [r, g]:
364+
r[['A']].agg({'A': ['sum', 'std'], 'B': ['mean', 'std']})
365+
366+
for t in [r, g]:
367+
r['A'].agg({'A': ['sum', 'std'], 'B': ['mean', 'std']})
368+
354369

355370
class TestResample(tm.TestCase):
356371
_multiprocess_can_split_ = True
@@ -515,7 +530,7 @@ def test_resample_with_timedeltas(self):
515530

516531
def test_resample_single_period_timedelta(self):
517532

518-
s = Series(range(5), index=pd.timedelta_range(
533+
s = Series(list(range(5)), index=pd.timedelta_range(
519534
'1 day', freq='s', periods=5))
520535
result = s.resample('2s').sum()
521536
expected = Series([1, 5, 4], index=pd.timedelta_range(

pandas/util/testing.py

+15-4
Original file line numberDiff line numberDiff line change
@@ -984,6 +984,7 @@ def assert_frame_equal(left, right, check_dtype=True,
984984
by_blocks=False,
985985
check_exact=False,
986986
check_datetimelike_compat=False,
987+
check_like=False,
987988
obj='DataFrame'):
988989

989990
"""Check that left and right DataFrame are equal.
@@ -1014,6 +1015,8 @@ def assert_frame_equal(left, right, check_dtype=True,
10141015
Whether to compare number exactly.
10151016
check_dateteimelike_compat : bool, default False
10161017
Compare datetime-like which is comparable ignoring dtype.
1018+
check_like : bool, default False
1019+
If true, then reindex_like operands
10171020
obj : str, default 'DataFrame'
10181021
Specify object name being compared, internally used to show appropriate
10191022
assertion message
@@ -1026,16 +1029,24 @@ def assert_frame_equal(left, right, check_dtype=True,
10261029
if check_frame_type:
10271030
assertIsInstance(left, type(right))
10281031

1032+
if check_like:
1033+
left, right = left.reindex_like(right), right
1034+
10291035
# shape comparison (row)
10301036
if left.shape[0] != right.shape[0]:
1031-
raise_assert_detail(obj, 'DataFrame shape (number of rows) are different',
1037+
raise_assert_detail(obj,
1038+
'DataFrame shape (number of rows) are different',
10321039
'{0}, {1}'.format(left.shape[0], left.index),
10331040
'{0}, {1}'.format(right.shape[0], right.index))
10341041
# shape comparison (columns)
10351042
if left.shape[1] != right.shape[1]:
1036-
raise_assert_detail(obj, 'DataFrame shape (number of columns) are different',
1037-
'{0}, {1}'.format(left.shape[1], left.columns),
1038-
'{0}, {1}'.format(right.shape[1], right.columns))
1043+
raise_assert_detail(obj,
1044+
'DataFrame shape (number of columns) '
1045+
'are different',
1046+
'{0}, {1}'.format(left.shape[1],
1047+
left.columns),
1048+
'{0}, {1}'.format(right.shape[1],
1049+
right.columns))
10391050

10401051
# index comparison
10411052
assert_index_equal(left.index, right.index, exact=check_index_type,

0 commit comments

Comments
 (0)