46
46
@Appender (_merge_doc , indents = 0 )
47
47
def merge (left , right , how = 'inner' , on = None , left_on = None , right_on = None ,
48
48
left_index = False , right_index = False , sort = False ,
49
- suffixes = ('_x' , '_y' ), copy = True , indicator = False ):
49
+ suffixes = ('_x' , '_y' ), copy = True , indicator = False ,
50
+ validate = None ):
50
51
op = _MergeOperation (left , right , how = how , on = on , left_on = left_on ,
51
52
right_on = right_on , left_index = left_index ,
52
53
right_index = right_index , sort = sort , suffixes = suffixes ,
53
- copy = copy , indicator = indicator )
54
+ copy = copy , indicator = indicator ,
55
+ validate = validate )
54
56
return op .get_result ()
55
57
56
58
@@ -498,7 +500,8 @@ class _MergeOperation(object):
498
500
def __init__ (self , left , right , how = 'inner' , on = None ,
499
501
left_on = None , right_on = None , axis = 1 ,
500
502
left_index = False , right_index = False , sort = True ,
501
- suffixes = ('_x' , '_y' ), copy = True , indicator = False ):
503
+ suffixes = ('_x' , '_y' ), copy = True , indicator = False ,
504
+ validate = None ):
502
505
self .left = self .orig_left = left
503
506
self .right = self .orig_right = right
504
507
self .how = how
@@ -516,6 +519,7 @@ def __init__(self, left, right, how='inner', on=None,
516
519
self .right_index = right_index
517
520
518
521
self .indicator = indicator
522
+ self .validate = validate
519
523
520
524
if isinstance (self .indicator , compat .string_types ):
521
525
self .indicator_name = self .indicator
@@ -561,6 +565,9 @@ def __init__(self, left, right, how='inner', on=None,
561
565
# to avoid incompat dtypes
562
566
self ._maybe_coerce_merge_keys ()
563
567
568
+ if self .validate is not None :
569
+ self ._validate ()
570
+
564
571
def get_result (self ):
565
572
if self .indicator :
566
573
self .left , self .right = self ._indicator_pre_merge (
@@ -952,6 +959,47 @@ def _validate_specification(self):
952
959
if len (self .right_on ) != len (self .left_on ):
953
960
raise ValueError ("len(right_on) must equal len(left_on)" )
954
961
962
+ def _validate (self ):
963
+
964
+ # Get merging series:
965
+ left_key = self .left_on if self .left_on is not None else self .on
966
+ right_key = self .right_on if self .right_on is not None else self .on
967
+
968
+ if self .left_index :
969
+ left_unique = not (self .orig_left .index .duplicated ()).any ()
970
+ else :
971
+ left_unique = not (self .orig_left [left_key ].duplicated ()).any ()
972
+
973
+ if self .right_index :
974
+ right_unique = not (self .orig_right .index .duplicated ()).any ()
975
+ else :
976
+ right_unique = not (self .orig_right [right_key ].duplicated ()).any ()
977
+
978
+ # Check valid arg
979
+ if self .validate not in ['one_to_one' , '1:1' ,
980
+ 'one_to_many' , '1:m' ,
981
+ 'many_to_one' , 'm:1' ,
982
+ 'many_to_many' , 'm:m' ]:
983
+
984
+ raise ValueError ("Not a valid argument for validate" )
985
+
986
+ # Check data integrity
987
+ if self .validate in ["one_to_one" , "1:1" ]:
988
+ if not left_unique or not right_unique :
989
+ raise ValueError ("Merge keys are not unique in either left or right dataset; not a one-to-one merge" )
990
+ if not left_unique :
991
+ raise ValueError ("Merge keys are not unique in left dataset; not a one-to-one merge" )
992
+ if not right_unique :
993
+ raise ValueError ("Merge keys are not unique in right dataset; not a one-to-one merge" )
994
+
995
+ if self .validate in ["one_to_many" , "1:m" ]:
996
+ if not left_unique :
997
+ raise ValueError ("Merge keys are not unique in left dataset; not a one-to-many merge" )
998
+
999
+ if self .validate in ["many_to_one" , "m:1" ]:
1000
+ if not right_unique :
1001
+ raise ValueError ("Merge keys are not unique in right dataset; not a many-to-one merge" )
1002
+
955
1003
956
1004
def _get_join_indexers (left_keys , right_keys , sort = False , how = 'inner' ,
957
1005
** kwargs ):
0 commit comments