|
8 | 8 | import numpy as np
|
9 | 9 | import random
|
10 | 10 |
|
11 |
| -from pandas.compat import range, lrange, lzip, zip |
| 11 | +from pandas.compat import range, lrange, lzip, zip, StringIO |
12 | 12 | from pandas import compat, _np_version_under1p7
|
13 | 13 | from pandas.tseries.index import DatetimeIndex
|
14 | 14 | from pandas.tools.merge import merge, concat, ordered_merge, MergeError
|
15 | 15 | from pandas.util.testing import (assert_frame_equal, assert_series_equal,
|
16 | 16 | assert_almost_equal, rands,
|
17 | 17 | makeCustomDataframe as mkdf,
|
18 | 18 | assertRaisesRegexp)
|
19 |
| -from pandas import isnull, DataFrame, Index, MultiIndex, Panel, Series, date_range |
| 19 | +from pandas import isnull, DataFrame, Index, MultiIndex, Panel, Series, date_range, read_table |
20 | 20 | import pandas.algos as algos
|
21 | 21 | import pandas.util.testing as tm
|
22 | 22 |
|
@@ -1025,6 +1025,98 @@ def test_int64_overflow_issues(self):
|
1025 | 1025 | result = merge(df1, df2, how='outer')
|
1026 | 1026 | self.assertTrue(len(result) == 2000)
|
1027 | 1027 |
|
| 1028 | + def test_join_multi_levels(self): |
| 1029 | + |
| 1030 | + # GH 3662 |
| 1031 | + # merge multi-levels |
| 1032 | + |
| 1033 | + household = DataFrame(dict(household_id = [1,2,3], |
| 1034 | + male = [0,1,0], |
| 1035 | + wealth = [196087.3,316478.7,294750]), |
| 1036 | + columns = ['household_id','male','wealth']).set_index('household_id') |
| 1037 | + portfolio = DataFrame(dict(household_id = [1,2,2,3,3,3,4], |
| 1038 | + asset_id = ["nl0000301109","nl0000289783","gb00b03mlx29","gb00b03mlx29","lu0197800237","nl0000289965",np.nan], |
| 1039 | + name = ["ABN Amro","Robeco","Royal Dutch Shell","Royal Dutch Shell","AAB Eastern Europe Equity Fund","Postbank BioTech Fonds",np.nan], |
| 1040 | + share = [1.0,0.4,0.6,0.15,0.6,0.25,1.0]), |
| 1041 | + columns = ['household_id','asset_id','name','share']).set_index(['household_id','asset_id']) |
| 1042 | + result = household.join(portfolio, how='inner') |
| 1043 | + expected = DataFrame(dict(male = [0,1,1,0,0,0], |
| 1044 | + wealth = [ 196087.3, 316478.7, 316478.7, 294750.0, 294750.0, 294750.0 ], |
| 1045 | + name = ['ABN Amro','Robeco','Royal Dutch Shell','Royal Dutch Shell','AAB Eastern Europe Equity Fund','Postbank BioTech Fonds'], |
| 1046 | + share = [1.00,0.40,0.60,0.15,0.60,0.25], |
| 1047 | + household_id = [1,2,2,3,3,3], |
| 1048 | + asset_id = ['nl0000301109','nl0000289783','gb00b03mlx29','gb00b03mlx29','lu0197800237','nl0000289965']), |
| 1049 | + ).set_index(['household_id','asset_id']).reindex(columns=['male','wealth','name','share']) |
| 1050 | + assert_frame_equal(result,expected) |
| 1051 | + |
| 1052 | + assert_frame_equal(result,expected) |
| 1053 | + |
| 1054 | + # equivalency |
| 1055 | + result2 = merge(household.reset_index(),portfolio.reset_index(),on=['household_id'],how='inner').set_index(['household_id','asset_id']) |
| 1056 | + assert_frame_equal(result2,expected) |
| 1057 | + |
| 1058 | + result = household.join(portfolio, how='outer') |
| 1059 | + expected = concat([expected,DataFrame(dict(share = [1.00]), |
| 1060 | + index=MultiIndex.from_tuples([(4,np.nan)], |
| 1061 | + names=['household_id','asset_id']))], |
| 1062 | + axis=0).reindex(columns=expected.columns) |
| 1063 | + assert_frame_equal(result,expected) |
| 1064 | + |
| 1065 | + # invalid cases |
| 1066 | + household.index.name = 'foo' |
| 1067 | + def f(): |
| 1068 | + household.join(portfolio, how='inner') |
| 1069 | + self.assertRaises(ValueError, f) |
| 1070 | + |
| 1071 | + portfolio2 = portfolio.copy() |
| 1072 | + portfolio2.index.set_names(['household_id','foo']) |
| 1073 | + def f(): |
| 1074 | + portfolio2.join(portfolio, how='inner') |
| 1075 | + self.assertRaises(ValueError, f) |
| 1076 | + |
| 1077 | + def test_join_multi_levels2(self): |
| 1078 | + |
| 1079 | + # some more advanced merges |
| 1080 | + # GH6360 |
| 1081 | + household = DataFrame(dict(household_id = [1,2,2,3,3,3,4], |
| 1082 | + asset_id = ["nl0000301109","nl0000301109","gb00b03mlx29","gb00b03mlx29","lu0197800237","nl0000289965",np.nan], |
| 1083 | + share = [1.0,0.4,0.6,0.15,0.6,0.25,1.0]), |
| 1084 | + columns = ['household_id','asset_id','share']).set_index(['household_id','asset_id']) |
| 1085 | + |
| 1086 | + log_return = DataFrame(dict( |
| 1087 | + asset_id = ["gb00b03mlx29", "gb00b03mlx29", "gb00b03mlx29", "lu0197800237", "lu0197800237"], |
| 1088 | + t = [233, 234, 235, 180, 181], |
| 1089 | + log_return = [.09604978, -.06524096, .03532373, .03025441, .036997] |
| 1090 | + )).set_index(["asset_id","t"]) |
| 1091 | + |
| 1092 | + expected = DataFrame(dict( |
| 1093 | + household_id = [2, 2, 2, 3, 3, 3, 3, 3], |
| 1094 | + asset_id = ["gb00b03mlx29", "gb00b03mlx29", "gb00b03mlx29", "gb00b03mlx29", "gb00b03mlx29", "gb00b03mlx29", "lu0197800237", "lu0197800237"], |
| 1095 | + t = [233, 234, 235, 233, 234, 235, 180, 181], |
| 1096 | + share = [0.6, 0.6, 0.6, 0.15, 0.15, 0.15, 0.6, 0.6], |
| 1097 | + log_return = [.09604978, -.06524096, .03532373, .09604978, -.06524096, .03532373, .03025441, .036997] |
| 1098 | + )).set_index(["household_id", "asset_id", "t"]).reindex(columns=['share','log_return']) |
| 1099 | + |
| 1100 | + def f(): |
| 1101 | + household.join(log_return, how='inner') |
| 1102 | + self.assertRaises(NotImplementedError, f) |
| 1103 | + |
| 1104 | + # this is the equivalency |
| 1105 | + result = merge(household.reset_index(),log_return.reset_index(),on=['asset_id'],how='inner').set_index(['household_id','asset_id','t']) |
| 1106 | + assert_frame_equal(result,expected) |
| 1107 | + |
| 1108 | + expected = DataFrame(dict( |
| 1109 | + household_id = [1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4], |
| 1110 | + asset_id = ["nl0000301109", "nl0000289783", "gb00b03mlx29", "gb00b03mlx29", "gb00b03mlx29", "gb00b03mlx29", "gb00b03mlx29", "gb00b03mlx29", "lu0197800237", "lu0197800237", "nl0000289965", None], |
| 1111 | + t = [None, None, 233, 234, 235, 233, 234, 235, 180, 181, None, None], |
| 1112 | + share = [1.0, 0.4, 0.6, 0.6, 0.6, 0.15, 0.15, 0.15, 0.6, 0.6, 0.25, 1.0], |
| 1113 | + log_return = [None, None, .09604978, -.06524096, .03532373, .09604978, -.06524096, .03532373, .03025441, .036997, None, None] |
| 1114 | + )).set_index(["household_id", "asset_id", "t"]) |
| 1115 | + |
| 1116 | + def f(): |
| 1117 | + household.join(log_return, how='outer') |
| 1118 | + self.assertRaises(NotImplementedError, f) |
| 1119 | + |
1028 | 1120 | def _check_join(left, right, result, join_col, how='left',
|
1029 | 1121 | lsuffix='_x', rsuffix='_y'):
|
1030 | 1122 |
|
|
0 commit comments