46
46
@Appender (_merge_doc , indents = 0 )
47
47
def merge (left , right , how = 'inner' , on = None , left_on = None , right_on = None ,
48
48
left_index = False , right_index = False , sort = False ,
49
- suffixes = ('_x' , '_y' ), copy = True , indicator = False ):
49
+ suffixes = ('_x' , '_y' ), copy = True , indicator = False ,
50
+ validate = None ):
50
51
op = _MergeOperation (left , right , how = how , on = on , left_on = left_on ,
51
52
right_on = right_on , left_index = left_index ,
52
53
right_index = right_index , sort = sort , suffixes = suffixes ,
53
- copy = copy , indicator = indicator )
54
+ copy = copy , indicator = indicator ,
55
+ validate = validate )
54
56
return op .get_result ()
55
57
56
58
@@ -498,7 +500,8 @@ class _MergeOperation(object):
498
500
def __init__ (self , left , right , how = 'inner' , on = None ,
499
501
left_on = None , right_on = None , axis = 1 ,
500
502
left_index = False , right_index = False , sort = True ,
501
- suffixes = ('_x' , '_y' ), copy = True , indicator = False ):
503
+ suffixes = ('_x' , '_y' ), copy = True , indicator = False ,
504
+ validate = None ):
502
505
self .left = self .orig_left = left
503
506
self .right = self .orig_right = right
504
507
self .how = how
@@ -516,6 +519,7 @@ def __init__(self, left, right, how='inner', on=None,
516
519
self .right_index = right_index
517
520
518
521
self .indicator = indicator
522
+ self .validate = validate
519
523
520
524
if isinstance (self .indicator , compat .string_types ):
521
525
self .indicator_name = self .indicator
@@ -561,6 +565,9 @@ def __init__(self, left, right, how='inner', on=None,
561
565
# to avoid incompat dtypes
562
566
self ._maybe_coerce_merge_keys ()
563
567
568
+ if self .validate is not None :
569
+ self ._validate ()
570
+
564
571
def get_result (self ):
565
572
if self .indicator :
566
573
self .left , self .right = self ._indicator_pre_merge (
@@ -952,6 +959,51 @@ def _validate_specification(self):
952
959
if len (self .right_on ) != len (self .left_on ):
953
960
raise ValueError ("len(right_on) must equal len(left_on)" )
954
961
962
+ def _validate (self ):
963
+ # Get merging series:
964
+ left_key = self .left_on if self .left_on is not None else self .on
965
+ right_key = self .right_on if self .right_on is not None else self .on
966
+
967
+ if self .left_index :
968
+ left_unique = not (self .orig_left .index .duplicated ()).any ()
969
+ else :
970
+ left_unique = not (self .orig_left [left_key ].duplicated ()).any ()
971
+
972
+ if self .right_index :
973
+ right_unique = not (self .orig_right .index .duplicated ()).any ()
974
+ else :
975
+ right_unique = not (self .orig_right [right_key ].duplicated ()).any ()
976
+
977
+ # Check valid arg
978
+ if self .validate not in ['one_to_one' , '1:1' ,
979
+ 'one_to_many' , '1:m' ,
980
+ 'many_to_one' , 'm:1' ,
981
+ 'many_to_many' , 'm:m' ]:
982
+
983
+ raise ValueError ("Not a valid argument for validate" )
984
+
985
+ # Check data integrity
986
+ if self .validate in ["one_to_one" , "1:1" ]:
987
+ if not left_unique or not right_unique :
988
+ raise ValueError ("Merge keys are not unique in either left"
989
+ " or right dataset; not a one-to-one merge" )
990
+ if not left_unique :
991
+ raise ValueError ("Merge keys are not unique in left dataset;"
992
+ " not a one-to-one merge" )
993
+ if not right_unique :
994
+ raise ValueError ("Merge keys are not unique in right dataset;"
995
+ " not a one-to-one merge" )
996
+
997
+ if self .validate in ["one_to_many" , "1:m" ]:
998
+ if not left_unique :
999
+ raise ValueError ("Merge keys are not unique in left dataset;"
1000
+ "not a one-to-many merge" )
1001
+
1002
+ if self .validate in ["many_to_one" , "m:1" ]:
1003
+ if not right_unique :
1004
+ raise ValueError ("Merge keys are not unique in right dataset;"
1005
+ " not a many-to-one merge" )
1006
+
955
1007
956
1008
def _get_join_indexers (left_keys , right_keys , sort = False , how = 'inner' ,
957
1009
** kwargs ):
0 commit comments