From f679f971e201d99a1cef1939594c87ecb4635427 Mon Sep 17 00:00:00 2001 From: harisbal Date: Sat, 4 Mar 2017 12:31:50 +0000 Subject: [PATCH 1/4] Add support MultiIndex operations Enable operations (e.g. multiply) on MultiIndexed objects when the join is not ambiguous --- pandas/indexes/base.py | 77 ++++++++++++++++++++++++++------ pandas/tests/tools/test_merge.py | 4 +- 2 files changed, 66 insertions(+), 15 deletions(-) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 5d43d2d32af67..777442051354b 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -2896,27 +2896,38 @@ def join(self, other, how='left', level=None, return_indexers=False): def _join_multi(self, other, how, return_indexers=True): from .multi import MultiIndex - self_is_mi = isinstance(self, MultiIndex) - other_is_mi = isinstance(other, MultiIndex) # figure out join names self_names = [n for n in self.names if n is not None] other_names = [n for n in other.names if n is not None] overlap = list(set(self_names) & set(other_names)) + # Drop the non matching levels + drop_lvls = [l for l in other_names if l not in overlap] + other = other.droplevel(drop_lvls) + + self_is_mi = isinstance(self, MultiIndex) + other_is_mi = isinstance(other, MultiIndex) + # need at least 1 in common, but not more than 1 if not len(overlap): raise ValueError("cannot join with no level specified and no " "overlapping names") - if len(overlap) > 1: - raise NotImplementedError("merging with more than one level " - "overlap on a multi-index is not " - "implemented") - jl = overlap[0] - # make the indices into mi's that match - if not (self_is_mi and other_is_mi): + if self_is_mi and other_is_mi: + if other.is_unique: + # Join only when the other does not contain dupls + lindexer = self.get_indexer(self) + rindexer = other.get_indexer(other) + result = self, lindexer, rindexer + return result + else: + raise TypeError('Join on level between non-unique ' + 'MultiIndex objects is ambiguous') + else: + jl = overlap[0] + # make the indices into mi's that match flip_order = False if self_is_mi: self, other = other, self @@ -2933,10 +2944,6 @@ def _join_multi(self, other, how, return_indexers=True): return result[0], result[2], result[1] return result - # 2 multi-indexes - raise NotImplementedError("merging with both multi-indexes is not " - "implemented") - def _join_non_unique(self, other, how='left', return_indexers=False): from pandas.tools.merge import _get_join_indexers @@ -3868,3 +3875,47 @@ def _trim_front(strings): def _validate_join_method(method): if method not in ['left', 'right', 'inner', 'outer']: raise ValueError('do not recognize join method %s' % method) + +if __name__ == '__main__': + import pandas as pd + # GH 3662 + # merge multi-levels + household = ( + pd.DataFrame( + dict(household_id=[1, 2, 3], + male=[0, 1, 0], + wealth=[196087.3, 316478.7, 294750]), + columns=['household_id', 'male', 'wealth']) + .set_index(['household_id', 'male'])) + portfolio = ( + pd.DataFrame( + dict(household_id=[1, 2, 2, 3, 3, 3, 4], + asset_id=["nl0000301109", "nl0000289783", "gb00b03mlx29", + "gb00b03mlx29", "lu0197800237", "nl0000289965", + np.nan], + name=["ABN Amro", "Robeco", "Royal Dutch Shell", + "Royal Dutch Shell", + "AAB Eastern Europe Equity Fund", + "Postbank BioTech Fonds", np.nan], + share=[1.0, 0.4, 0.6, 0.15, 0.6, 0.25, 1.0]), + columns=['household_id', 'asset_id', 'name', 'share']) + .set_index(['household_id', 'asset_id'])) + #result = household.join(portfolio, how='inner') + expected = ( + pd.DataFrame( + dict(male=[0, 1, 1, 0, 0, 0], + wealth=[196087.3, 316478.7, 316478.7, + 294750.0, 294750.0, 294750.0], + name=['ABN Amro', 'Robeco', 'Royal Dutch Shell', + 'Royal Dutch Shell', + 'AAB Eastern Europe Equity Fund', + 'Postbank BioTech Fonds'], + share=[1.00, 0.40, 0.60, 0.15, 0.60, 0.25], + household_id=[1, 2, 2, 3, 3, 3], + asset_id=['nl0000301109', 'nl0000289783', 'gb00b03mlx29', + 'gb00b03mlx29', 'lu0197800237', + 'nl0000289965'])) + .set_index(['household_id', 'asset_id']) + .reindex(columns=['male', 'wealth', 'name', 'share'])) + + print(portfolio.share.multiply(household.wealth)) diff --git a/pandas/tests/tools/test_merge.py b/pandas/tests/tools/test_merge.py index b3b5e7e29319b..44b3f56ea4b00 100644 --- a/pandas/tests/tools/test_merge.py +++ b/pandas/tests/tools/test_merge.py @@ -1213,7 +1213,7 @@ def test_join_multi_levels2(self): def f(): household.join(log_return, how='inner') - self.assertRaises(NotImplementedError, f) + self.assertRaises(TypeError, f) # this is the equivalency result = (merge(household.reset_index(), log_return.reset_index(), @@ -1241,4 +1241,4 @@ def f(): def f(): household.join(log_return, how='outer') - self.assertRaises(NotImplementedError, f) + self.assertRaises(TypeError, f) From 7eb0b575e6ca0546a4272c795c7475bdc05a52a1 Mon Sep 17 00:00:00 2001 From: harisbal Date: Tue, 7 Mar 2017 00:17:11 +0000 Subject: [PATCH 2/4] Tests Working --- .gitignore | 2 ++ pandas/indexes/base.py | 10 ++++----- pandas/tests/tools/test_merge.py | 36 +++++++++++++++++++++++++++++--- 3 files changed, 40 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index a509fcf736ea8..8445932cd68c0 100644 --- a/.gitignore +++ b/.gitignore @@ -103,3 +103,5 @@ doc/source/index.rst doc/build/html/index.html # Windows specific leftover: doc/tmp.sv + +Untitled\.ipynb diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 777442051354b..2aa04ce5d960a 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -2904,7 +2904,6 @@ def _join_multi(self, other, how, return_indexers=True): # Drop the non matching levels drop_lvls = [l for l in other_names if l not in overlap] - other = other.droplevel(drop_lvls) self_is_mi = isinstance(self, MultiIndex) other_is_mi = isinstance(other, MultiIndex) @@ -2915,6 +2914,7 @@ def _join_multi(self, other, how, return_indexers=True): "overlapping names") if self_is_mi and other_is_mi: + other = other.droplevel(drop_lvls) if other.is_unique: # Join only when the other does not contain dupls lindexer = self.get_indexer(self) @@ -3879,14 +3879,14 @@ def _validate_join_method(method): if __name__ == '__main__': import pandas as pd # GH 3662 - # merge multi-levels +# merge multi-levels household = ( pd.DataFrame( dict(household_id=[1, 2, 3], male=[0, 1, 0], wealth=[196087.3, 316478.7, 294750]), columns=['household_id', 'male', 'wealth']) - .set_index(['household_id', 'male'])) + .set_index('household_id')) portfolio = ( pd.DataFrame( dict(household_id=[1, 2, 2, 3, 3, 3, 4], @@ -3900,7 +3900,6 @@ def _validate_join_method(method): share=[1.0, 0.4, 0.6, 0.15, 0.6, 0.25, 1.0]), columns=['household_id', 'asset_id', 'name', 'share']) .set_index(['household_id', 'asset_id'])) - #result = household.join(portfolio, how='inner') expected = ( pd.DataFrame( dict(male=[0, 1, 1, 0, 0, 0], @@ -3918,4 +3917,5 @@ def _validate_join_method(method): .set_index(['household_id', 'asset_id']) .reindex(columns=['male', 'wealth', 'name', 'share'])) - print(portfolio.share.multiply(household.wealth)) + result = household.join(portfolio, how='inner') + print(result) diff --git a/pandas/tests/tools/test_merge.py b/pandas/tests/tools/test_merge.py index 44b3f56ea4b00..85cbcb34db67b 100644 --- a/pandas/tests/tools/test_merge.py +++ b/pandas/tests/tools/test_merge.py @@ -1165,14 +1165,14 @@ def test_join_multi_levels(self): def f(): household.join(portfolio, how='inner') - self.assertRaises(ValueError, f) + self.assertRaises(TypeError, f) portfolio2 = portfolio.copy() portfolio2.index.set_names(['household_id', 'foo']) def f(): portfolio2.join(portfolio, how='inner') - self.assertRaises(ValueError, f) + self.assertRaises(TypeError, f) def test_join_multi_levels2(self): @@ -1215,7 +1215,7 @@ def f(): household.join(log_return, how='inner') self.assertRaises(TypeError, f) - # this is the equivalency + # this is equivalency the result = (merge(household.reset_index(), log_return.reset_index(), on=['asset_id'], how='inner') .set_index(['household_id', 'asset_id', 't'])) @@ -1242,3 +1242,33 @@ def f(): def f(): household.join(log_return, how='outer') self.assertRaises(TypeError, f) + + def test_join_multi_levels3(self): + matrix = ( + pd.DataFrame( + dict(Origin=[1, 1, 2, 2, 3], + Destination=[1, 2, 1, 3, 1], + Trips=[1987, 3647, 2470, 7521, 4296]), + columns=['Origin', 'Destination', 'Trips']) + .set_index(['Origin', 'Destination'])) + + distances = ( + pd.DataFrame( + dict(Origin=[1, 1, 2, 2, 3, 3], + Destination=[2, 3, 1, 3, 1, 2], + Distance=[100, 80, 90, 80, 70, 70]), + columns=['Origin', 'Destination', 'Distance']) + .set_index(['Origin', 'Destination'])) + + result = matrix.join(distances, how='left') + + expected = ( + pd.DataFrame( + dict(Origin=[1, 1, 2, 2, 3], + Destination=[1, 2, 1, 3, 1], + Trips=[1987, 3647, 2470, 7521, 4296], + Distance=[np.nan, 100, 90, 80, 70]), + columns=['Origin', 'Destination', 'Trips', 'Distance']) + .set_index(['Origin', 'Destination'])) + + assert_frame_equal(result, expected) From 9cade61c449f1d9f298af46e0f9a18b07b61177e Mon Sep 17 00:00:00 2001 From: harisbal Date: Fri, 7 Apr 2017 03:12:00 +0100 Subject: [PATCH 3/4] Working Needs cleanup --- pandas/indexes/base.py | 127 ++++++++++++++++++++++++++--------------- 1 file changed, 80 insertions(+), 47 deletions(-) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 2aa04ce5d960a..6be7580528e07 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -2900,10 +2900,12 @@ def _join_multi(self, other, how, return_indexers=True): # figure out join names self_names = [n for n in self.names if n is not None] other_names = [n for n in other.names if n is not None] + overlap = list(set(self_names) & set(other_names)) # Drop the non matching levels - drop_lvls = [l for l in other_names if l not in overlap] + ldrop_lvls = [l for l in self_names if l not in overlap] + rdrop_lvls = [l for l in other_names if l not in overlap] self_is_mi = isinstance(self, MultiIndex) other_is_mi = isinstance(other, MultiIndex) @@ -2914,16 +2916,49 @@ def _join_multi(self, other, how, return_indexers=True): "overlapping names") if self_is_mi and other_is_mi: - other = other.droplevel(drop_lvls) - if other.is_unique: - # Join only when the other does not contain dupls - lindexer = self.get_indexer(self) - rindexer = other.get_indexer(other) - result = self, lindexer, rindexer - return result + self_tmp = self.droplevel(ldrop_lvls) + other_tmp = other.droplevel(rdrop_lvls) + + join_index, lidx, ridx = self_tmp.join(other_tmp, how=how, + return_indexers=True) + + # Append to the returned Index the non-overlapping levels + not_overlap = (set(self_names) ^ set(other_names)) + + #def _get_levels(): + if how == 'left': + ji = self + elif how == 'right': + ji = other else: - raise TypeError('Join on level between non-unique ' - 'MultiIndex objects is ambiguous') + ji = join_index + + new_levels = ji.levels + new_labels = ji.labels + new_names = ji.names + + if how == 'outer': + for n in not_overlap: + if n in self_names: + idx = lidx + lvls = self.levels[self_names.index(n)].values + lbls = self.labels[self_names.index(n)] + else: + idx = ridx + lvls = other.levels[other_names.index(n)].values + lbls = other.labels[other_names.index(n)] + + new_levels = new_levels.union([lvls]) + l = [lbls[i] if i!=-1 else -1 for i in idx] + new_labels = new_labels.union([l]) + + new_names = new_names.union([n]) + + join_index = MultiIndex(levels=new_levels, labels=new_labels, + names=new_names, verify_integrity=False) + + return join_index, lidx, ridx + else: jl = overlap[0] @@ -3878,44 +3913,42 @@ def _validate_join_method(method): if __name__ == '__main__': import pandas as pd - # GH 3662 -# merge multi-levels - household = ( + + matrix = ( pd.DataFrame( - dict(household_id=[1, 2, 3], - male=[0, 1, 0], - wealth=[196087.3, 316478.7, 294750]), - columns=['household_id', 'male', 'wealth']) - .set_index('household_id')) - portfolio = ( + dict(Origin=[1, 1, 2, 2, 3], + Destination=[1, 2, 1, 3, 4], + Period=['AM','PM','IP','AM','OP'], + TripPurp=['hbw', 'nhb', 'hbo', 'nhb', 'hbw'], + Trips=[1987, 3647, 2470, 4296, 4444]), + columns=['Origin', 'Destination', 'Period', 'TripPurp', 'Trips']) + .set_index(['Origin', 'Destination', 'Period', 'TripPurp'])) + + distances = ( pd.DataFrame( - dict(household_id=[1, 2, 2, 3, 3, 3, 4], - asset_id=["nl0000301109", "nl0000289783", "gb00b03mlx29", - "gb00b03mlx29", "lu0197800237", "nl0000289965", - np.nan], - name=["ABN Amro", "Robeco", "Royal Dutch Shell", - "Royal Dutch Shell", - "AAB Eastern Europe Equity Fund", - "Postbank BioTech Fonds", np.nan], - share=[1.0, 0.4, 0.6, 0.15, 0.6, 0.25, 1.0]), - columns=['household_id', 'asset_id', 'name', 'share']) - .set_index(['household_id', 'asset_id'])) + dict(Origin= [1, 1, 2, 2, 3, 3, 5], + Destination=[1, 2, 1, 2, 1, 2, 6], + Period=['AM','PM','IP','AM','OP','IP', 'AM'], + LinkType=['a', 'a', 'c', 'b', 'a', 'b', 'a'], + Distance=[100, 80, 90, 80, 75, 35, 55]), + columns=['Origin', 'Destination', 'Period', 'LinkType', 'Distance']) + .set_index(['Origin', 'Destination','Period', 'LinkType'])) + expected = ( pd.DataFrame( - dict(male=[0, 1, 1, 0, 0, 0], - wealth=[196087.3, 316478.7, 316478.7, - 294750.0, 294750.0, 294750.0], - name=['ABN Amro', 'Robeco', 'Royal Dutch Shell', - 'Royal Dutch Shell', - 'AAB Eastern Europe Equity Fund', - 'Postbank BioTech Fonds'], - share=[1.00, 0.40, 0.60, 0.15, 0.60, 0.25], - household_id=[1, 2, 2, 3, 3, 3], - asset_id=['nl0000301109', 'nl0000289783', 'gb00b03mlx29', - 'gb00b03mlx29', 'lu0197800237', - 'nl0000289965'])) - .set_index(['household_id', 'asset_id']) - .reindex(columns=['male', 'wealth', 'name', 'share'])) - - result = household.join(portfolio, how='inner') - print(result) + dict(Origin= [1, 1, 2, 2, 3], + Destination=[1, 2, 1, 3, 1], + + Period=['AM','PM','IP', 'AM', 'OP'], + Trips=[1987, 3647, 2470, 4296, 4444], + Distance=[100, 80, 90, np.nan, 75]), + columns=['Origin', 'Destination', 'Period', 'Trips', 'Distance']) + .set_index(['Origin', 'Destination', 'Period'])) + + + print(matrix) + print(distances) + + result = matrix.join(distances, how='outer') + + print(result) \ No newline at end of file From 82a04b62a6ca8513be5ac7e68f0e4155a383d608 Mon Sep 17 00:00:00 2001 From: harisbal Date: Wed, 12 Apr 2017 00:49:05 +0100 Subject: [PATCH 4/4] merge multi-index with a multi-index Allow the join on multi-indexed objects --- pandas/indexes/base.py | 99 ++++++++++---------------------- pandas/tests/tools/test_merge.py | 67 +++++++++++---------- 2 files changed, 69 insertions(+), 97 deletions(-) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 6be7580528e07..470b2d2842119 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -2897,15 +2897,33 @@ def join(self, other, how='left', level=None, return_indexers=False): def _join_multi(self, other, how, return_indexers=True): from .multi import MultiIndex + def _complete_join(new_lvls, new_lbls, new_nms): + for n in not_overlap: + if n in self_names: + idx = lidx + lvls = self.levels[self_names.index(n)].values + lbls = self.labels[self_names.index(n)] + else: + idx = ridx + lvls = other.levels[other_names.index(n)].values + lbls = other.labels[other_names.index(n)] + + new_lvls = new_levels.union([lvls]) + l = [lbls[i] if i!=-1 else -1 for i in idx] + new_lbls = new_lbls.union([l]) + + new_nms = new_nms.union([n]) + + return new_lvls, new_lbls, new_nms + # figure out join names self_names = [n for n in self.names if n is not None] other_names = [n for n in other.names if n is not None] - overlap = list(set(self_names) & set(other_names)) # Drop the non matching levels - ldrop_lvls = [l for l in self_names if l not in overlap] - rdrop_lvls = [l for l in other_names if l not in overlap] + ldrop_levels = [l for l in self_names if l not in overlap] + rdrop_levels = [l for l in other_names if l not in overlap] self_is_mi = isinstance(self, MultiIndex) other_is_mi = isinstance(other, MultiIndex) @@ -2916,8 +2934,8 @@ def _join_multi(self, other, how, return_indexers=True): "overlapping names") if self_is_mi and other_is_mi: - self_tmp = self.droplevel(ldrop_lvls) - other_tmp = other.droplevel(rdrop_lvls) + self_tmp = self.droplevel(ldrop_levels) + other_tmp = other.droplevel(rdrop_levels) join_index, lidx, ridx = self_tmp.join(other_tmp, how=how, return_indexers=True) @@ -2925,7 +2943,6 @@ def _join_multi(self, other, how, return_indexers=True): # Append to the returned Index the non-overlapping levels not_overlap = (set(self_names) ^ set(other_names)) - #def _get_levels(): if how == 'left': ji = self elif how == 'right': @@ -2933,27 +2950,15 @@ def _join_multi(self, other, how, return_indexers=True): else: ji = join_index - new_levels = ji.levels - new_labels = ji.labels - new_names = ji.names - if how == 'outer': - for n in not_overlap: - if n in self_names: - idx = lidx - lvls = self.levels[self_names.index(n)].values - lbls = self.labels[self_names.index(n)] - else: - idx = ridx - lvls = other.levels[other_names.index(n)].values - lbls = other.labels[other_names.index(n)] - - new_levels = new_levels.union([lvls]) - l = [lbls[i] if i!=-1 else -1 for i in idx] - new_labels = new_labels.union([l]) - - new_names = new_names.union([n]) - + new_levels, new_labels, new_names = _complete_join(ji.levels, + ji.labels, + ji.names) + else: + new_levels = ji.levels + new_labels = ji.labels + new_names = ji.names + join_index = MultiIndex(levels=new_levels, labels=new_labels, names=new_names, verify_integrity=False) @@ -3910,45 +3915,3 @@ def _trim_front(strings): def _validate_join_method(method): if method not in ['left', 'right', 'inner', 'outer']: raise ValueError('do not recognize join method %s' % method) - -if __name__ == '__main__': - import pandas as pd - - matrix = ( - pd.DataFrame( - dict(Origin=[1, 1, 2, 2, 3], - Destination=[1, 2, 1, 3, 4], - Period=['AM','PM','IP','AM','OP'], - TripPurp=['hbw', 'nhb', 'hbo', 'nhb', 'hbw'], - Trips=[1987, 3647, 2470, 4296, 4444]), - columns=['Origin', 'Destination', 'Period', 'TripPurp', 'Trips']) - .set_index(['Origin', 'Destination', 'Period', 'TripPurp'])) - - distances = ( - pd.DataFrame( - dict(Origin= [1, 1, 2, 2, 3, 3, 5], - Destination=[1, 2, 1, 2, 1, 2, 6], - Period=['AM','PM','IP','AM','OP','IP', 'AM'], - LinkType=['a', 'a', 'c', 'b', 'a', 'b', 'a'], - Distance=[100, 80, 90, 80, 75, 35, 55]), - columns=['Origin', 'Destination', 'Period', 'LinkType', 'Distance']) - .set_index(['Origin', 'Destination','Period', 'LinkType'])) - - expected = ( - pd.DataFrame( - dict(Origin= [1, 1, 2, 2, 3], - Destination=[1, 2, 1, 3, 1], - - Period=['AM','PM','IP', 'AM', 'OP'], - Trips=[1987, 3647, 2470, 4296, 4444], - Distance=[100, 80, 90, np.nan, 75]), - columns=['Origin', 'Destination', 'Period', 'Trips', 'Distance']) - .set_index(['Origin', 'Destination', 'Period'])) - - - print(matrix) - print(distances) - - result = matrix.join(distances, how='outer') - - print(result) \ No newline at end of file diff --git a/pandas/tests/tools/test_merge.py b/pandas/tests/tools/test_merge.py index 85cbcb34db67b..040a1ac619a9d 100644 --- a/pandas/tests/tools/test_merge.py +++ b/pandas/tests/tools/test_merge.py @@ -1165,14 +1165,14 @@ def test_join_multi_levels(self): def f(): household.join(portfolio, how='inner') - self.assertRaises(TypeError, f) + self.assertRaises(ValueError, f) portfolio2 = portfolio.copy() portfolio2.index.set_names(['household_id', 'foo']) def f(): portfolio2.join(portfolio, how='inner') - self.assertRaises(TypeError, f) + self.assertRaises(ValueError, f) def test_join_multi_levels2(self): @@ -1211,10 +1211,6 @@ def test_join_multi_levels2(self): .set_index(["household_id", "asset_id", "t"]) .reindex(columns=['share', 'log_return'])) - def f(): - household.join(log_return, how='inner') - self.assertRaises(TypeError, f) - # this is equivalency the result = (merge(household.reset_index(), log_return.reset_index(), on=['asset_id'], how='inner') @@ -1224,7 +1220,7 @@ def f(): expected = ( DataFrame(dict( household_id=[1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4], - asset_id=["nl0000301109", "nl0000289783", "gb00b03mlx29", + asset_id=["nl0000301109", "nl0000301109", "gb00b03mlx29", "gb00b03mlx29", "gb00b03mlx29", "gb00b03mlx29", "gb00b03mlx29", "gb00b03mlx29", "lu0197800237", "lu0197800237", @@ -1237,38 +1233,51 @@ def f(): .09604978, -.06524096, .03532373, .03025441, .036997, None, None] )) - .set_index(["household_id", "asset_id", "t"])) + .set_index(["household_id", "asset_id", "t"]) + .reindex(columns=['share', 'log_return'])) - def f(): - household.join(log_return, how='outer') - self.assertRaises(TypeError, f) + result = (merge(household.reset_index(), log_return.reset_index(), + on=['asset_id'], how='outer') + .set_index(['household_id', 'asset_id', 't'])) + + assert_frame_equal(result, expected) def test_join_multi_levels3(self): + matrix = ( pd.DataFrame( dict(Origin=[1, 1, 2, 2, 3], Destination=[1, 2, 1, 3, 1], - Trips=[1987, 3647, 2470, 7521, 4296]), - columns=['Origin', 'Destination', 'Trips']) - .set_index(['Origin', 'Destination'])) - + Period=['AM','PM','IP','AM','OP'], + TripPurp=['hbw', 'nhb', 'hbo', 'nhb', 'hbw'], + Trips=[1987, 3647, 2470, 4296, 4444]), + columns=['Origin', 'Destination', 'Period', + 'TripPurp', 'Trips']) + .set_index(['Origin', 'Destination', 'Period', 'TripPurp'])) + distances = ( pd.DataFrame( - dict(Origin=[1, 1, 2, 2, 3, 3], - Destination=[2, 3, 1, 3, 1, 2], - Distance=[100, 80, 90, 80, 70, 70]), - columns=['Origin', 'Destination', 'Distance']) - .set_index(['Origin', 'Destination'])) - - result = matrix.join(distances, how='left') - + dict(Origin= [1, 1, 2, 2, 3, 3, 5], + Destination=[1, 2, 1, 2, 1, 2, 6], + Period=['AM','PM','IP','AM','OP','IP', 'AM'], + LinkType=['a', 'a', 'c', 'b', 'a', 'b', 'a'], + Distance=[100, 80, 90, 80, 75, 35, 55]), + columns=['Origin', 'Destination', 'Period', + 'LinkType', 'Distance']) + .set_index(['Origin', 'Destination','Period', 'LinkType'])) + expected = ( pd.DataFrame( - dict(Origin=[1, 1, 2, 2, 3], + dict(Origin= [1, 1, 2, 2, 3], Destination=[1, 2, 1, 3, 1], - Trips=[1987, 3647, 2470, 7521, 4296], - Distance=[np.nan, 100, 90, 80, 70]), - columns=['Origin', 'Destination', 'Trips', 'Distance']) - .set_index(['Origin', 'Destination'])) - + Period=['AM','PM','IP', 'AM', 'OP'], + TripPurp=['hbw', 'nhb', 'hbo', 'nhb', 'hbw'], + Trips=[1987, 3647, 2470, 4296, 4444], + Distance=[100, 80, 90, np.nan, 75]), + columns=['Origin', 'Destination', 'Period', 'TripPurp', + 'Trips', 'Distance']) + .set_index(['Origin', 'Destination', 'Period', 'TripPurp'])) + + + result = matrix.join(distances, how='left') assert_frame_equal(result, expected)