Skip to content

Commit 9c24834

Browse files
author
Nick Eubank
committed
revisions for comments, remove from merge_asof
1 parent 496e915 commit 9c24834

File tree

4 files changed

+68
-86
lines changed

4 files changed

+68
-86
lines changed

doc/source/merging.rst

+43-17
Original file line numberDiff line numberDiff line change
@@ -552,14 +552,17 @@ standard database join operations between DataFrame objects:
552552

553553
.. versionadded:: 0.17.0
554554

555-
- ``validate`` : {None, '1:1', '1:m', 'm:1', 'm:m', "one_to_one", "one_to_many", "many_to_one", "many_to_many"}, default None
555+
- ``validate`` : String, default None
556556
If specified, checks if merge is of specified type.
557-
* "one_to_one" or "1:1": check if merge keys are unique in both
558-
left and right dataset.
559-
* "one_to_many" or "1:m": check if merge keys are unique in left
560-
dataset.
561-
* "many_to_one" or "m:1": check if merge keys are unique in right
562-
dataset.
557+
558+
* "one_to_one" or "1:1": checks if merge keys are unique in both
559+
left and right datasets.
560+
* "one_to_many" or "1:m": checks if merge keys are unique in left
561+
dataset.
562+
* "many_to_one" or "m:1": checks if merge keys are unique in right
563+
dataset.
564+
* "many_to_may" or "m:m": allowed, but does not result in checks.
565+
563566

564567
.. versionadded:: 0.21.0
565568

@@ -740,22 +743,45 @@ Users can use the ``validate`` argument to automatically check whether there are
740743

741744
In the following example, there are duplicate values of ``B`` in the right DataFrame. As this is not a one-to-one merge -- as specified in the ``validate`` argument -- an exception will be raised.
742745

743-
.. code-block:: python
746+
747+
.. ipython:: python
744748
745749
left = pd.DataFrame({'A' : [1,2], 'B' : [1, 2]})
746750
right = pd.DataFrame({'A' : [4,5,6], 'B': [2, 2, 2]})
747-
result = pd.merge(left, right, on='B', how='outer', validate="one_to_one");
748-
749-
ValueError: Merge keys are not unique in either left or right dataset; not a one-to-one merge
750-
751751
752-
If the user is aware of the duplicates in the right `DataFrame` but wants to ensure there are no duplicates in the left DataFrame, one can use the `one_to_many` argument instead, which will not raise an exception.
752+
.. code-block:: python
753753
754-
.. ipython:: python
755-
:suppress:
754+
result = pd.merge(left, right, on='B', how='outer', validate="one_to_one")
755+
756+
---------------------------------------------------------------------------
757+
ValueError Traceback (most recent call last)
758+
<ipython-input-3-29316387da90> in <module>()
759+
----> 1 result = pd.merge(left, right, on='B', how='outer', validate="one_to_one")
760+
761+
/Users/Nick/github/pandas/pandas/core/reshape/merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
762+
53 right_index=right_index, sort=sort, suffixes=suffixes,
763+
54 copy=copy, indicator=indicator,
764+
---> 55 validate=validate)
765+
56 return op.get_result()
766+
57
767+
768+
/Users/Nick/github/pandas/pandas/core/reshape/merge.py in __init__(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, copy, indicator, validate)
769+
570 # are in fact unique.
770+
571 if validate is not None:
771+
--> 572 self._validate(validate)
772+
573
773+
574 def get_result(self):
774+
775+
/Users/Nick/github/pandas/pandas/core/reshape/merge.py in _validate(self, validate)
776+
987 " not a one-to-one merge")
777+
988 elif not right_unique:
778+
--> 989 raise ValueError("Merge keys are not unique in right dataset;"
779+
990 " not a one-to-one merge")
780+
991
781+
782+
ValueError: Merge keys are not unique in right dataset; not a one-to-one merge
756783
757-
left = pd.DataFrame({'A' : [1,2], 'B' : [1, 2]})
758-
right = pd.DataFrame({'A' : [4,5,6], 'B': [2, 2, 2]})
784+
If the user is aware of the duplicates in the right `DataFrame` but wants to ensure there are no duplicates in the left DataFrame, one can use the `one_to_many` argument instead, which will not raise an exception.
759785

760786
.. ipython:: python
761787

pandas/core/frame.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -174,15 +174,16 @@
174174
175175
.. versionadded:: 0.17.0
176176
177-
validate : {None, '1:1', '1:m', 'm:1', 'm:m', "one_to_one", "one_to_many",
178-
"many_to_one", "many_to_many"}, default None
177+
validate : String, default None
179178
If specified, checks if merge is of specified type.
179+
180180
* "one_to_one" or "1:1": check if merge keys are unique in both
181-
left and right dataset.
181+
left and right datasets.
182182
* "one_to_many" or "1:m": check if merge keys are unique in left
183-
dataset.
183+
dataset.
184184
* "many_to_one" or "m:1": check if merge keys are unique in right
185-
dataset.
185+
dataset.
186+
* "many_to_may" or "m:m": allowed, but does not result in checks.
186187
187188
.. versionadded:: 0.21.0
188189

pandas/core/reshape/merge.py

+19-38
Original file line numberDiff line numberDiff line change
@@ -265,8 +265,7 @@ def merge_asof(left, right, on=None,
265265
suffixes=('_x', '_y'),
266266
tolerance=None,
267267
allow_exact_matches=True,
268-
direction='backward',
269-
validate=None):
268+
direction='backward'):
270269
"""Perform an asof merge. This is similar to a left-join except that we
271270
match on nearest key rather than equal keys.
272271
@@ -344,18 +343,6 @@ def merge_asof(left, right, on=None,
344343
345344
.. versionadded:: 0.20.0
346345
347-
validate : {None, '1:1', '1:m', 'm:1', 'm:m', "one_to_one", "one_to_many",
348-
"many_to_one", "many_to_many"}, default None
349-
If specified, checks if merge is of specified type.
350-
* "one_to_one" or "1:1": check if merge keys are unique in both
351-
left and right dataset.
352-
* "one_to_many" or "1:m": check if merge keys are unique in left
353-
dataset.
354-
* "many_to_one" or "m:1": check if merge keys are unique in right
355-
dataset.
356-
357-
.. versionadded:: 0.21.0
358-
359346
360347
Returns
361348
-------
@@ -498,7 +485,7 @@ def merge_asof(left, right, on=None,
498485
suffixes=suffixes,
499486
how='asof', tolerance=tolerance,
500487
allow_exact_matches=allow_exact_matches,
501-
direction=direction, validate=validate)
488+
direction=direction)
502489
return op.get_result()
503490

504491

@@ -979,47 +966,45 @@ def _validate(self, validate):
979966

980967
# Check uniqueness of each
981968
if self.left_index:
982-
left_unique = not (self.orig_left.index.duplicated()).any()
969+
left_unique = not self.orig_left.index.is_unique
983970
else:
984971
left_unique = MultiIndex.from_arrays(self.left_join_keys
985972
).is_unique
986973

987974
if self.right_index:
988-
right_unique = not (self.orig_right.index.duplicated()).any()
975+
right_unique = not self.orig_right.index.is_unique
989976
else:
990977
right_unique = MultiIndex.from_arrays(self.right_join_keys
991978
).is_unique
992979

993-
# Check valid arg
994-
if validate not in ['one_to_one', '1:1',
995-
'one_to_many', '1:m',
996-
'many_to_one', 'm:1',
997-
'many_to_many', 'm:m']:
998-
999-
raise ValueError("Not a valid argument for validate")
1000-
1001980
# Check data integrity
1002981
if validate in ["one_to_one", "1:1"]:
1003-
if not left_unique or not right_unique:
982+
if not left_unique and not right_unique:
1004983
raise ValueError("Merge keys are not unique in either left"
1005984
" or right dataset; not a one-to-one merge")
1006-
if not left_unique:
985+
elif not left_unique:
1007986
raise ValueError("Merge keys are not unique in left dataset;"
1008987
" not a one-to-one merge")
1009-
if not right_unique:
988+
elif not right_unique:
1010989
raise ValueError("Merge keys are not unique in right dataset;"
1011990
" not a one-to-one merge")
1012991

1013-
if validate in ["one_to_many", "1:m"]:
992+
elif validate in ["one_to_many", "1:m"]:
1014993
if not left_unique:
1015994
raise ValueError("Merge keys are not unique in left dataset;"
1016995
"not a one-to-many merge")
1017996

1018-
if validate in ["many_to_one", "m:1"]:
997+
elif validate in ["many_to_one", "m:1"]:
1019998
if not right_unique:
1020999
raise ValueError("Merge keys are not unique in right dataset;"
10211000
" not a many-to-one merge")
10221001

1002+
elif validate in ['many_to_many', 'm:m']:
1003+
pass
1004+
1005+
else:
1006+
raise ValueError("Not a valid argument for validate")
1007+
10231008

10241009
def _get_join_indexers(left_keys, right_keys, sort=False, how='inner',
10251010
**kwargs):
@@ -1072,19 +1057,16 @@ class _OrderedMerge(_MergeOperation):
10721057
def __init__(self, left, right, on=None, left_on=None, right_on=None,
10731058
left_index=False, right_index=False, axis=1,
10741059
suffixes=('_x', '_y'), copy=True,
1075-
fill_method=None, how='outer',
1076-
validate=None):
1060+
fill_method=None, how='outer'):
10771061

10781062
self.fill_method = fill_method
10791063
_MergeOperation.__init__(self, left, right, on=on, left_on=left_on,
10801064
left_index=left_index,
10811065
right_index=right_index,
10821066
right_on=right_on, axis=axis,
10831067
how=how, suffixes=suffixes,
1084-
sort=True, # factorize sorts
1085-
validate=validate
1068+
sort=True # factorize sorts
10861069
)
1087-
10881070
def get_result(self):
10891071
join_index, left_indexer, right_indexer = self._get_join_info()
10901072

@@ -1179,7 +1161,7 @@ def __init__(self, left, right, on=None, left_on=None, right_on=None,
11791161
fill_method=None,
11801162
how='asof', tolerance=None,
11811163
allow_exact_matches=True,
1182-
direction='backward', validate=None):
1164+
direction='backward'):
11831165

11841166
self.by = by
11851167
self.left_by = left_by
@@ -1192,8 +1174,7 @@ def __init__(self, left, right, on=None, left_on=None, right_on=None,
11921174
right_on=right_on, left_index=left_index,
11931175
right_index=right_index, axis=axis,
11941176
how=how, suffixes=suffixes,
1195-
fill_method=fill_method,
1196-
validate=validate)
1177+
fill_method=fill_method)
11971178

11981179
def _validate_specification(self):
11991180
super(_AsOfMerge, self)._validate_specification()

pandas/tests/reshape/test_merge_asof.py

-26
Original file line numberDiff line numberDiff line change
@@ -973,29 +973,3 @@ def test_on_float_by_int(self):
973973
columns=['symbol', 'exch', 'price', 'mpv'])
974974

975975
assert_frame_equal(result, expected)
976-
977-
def test_validate(self):
978-
979-
left = pd.DataFrame({'a': [1, 5, 10],
980-
'left_val': ['a', 'b', 'c']})
981-
right = pd.DataFrame({'a': [1, 2, 3, 6, 7],
982-
'right_val': [1, 2, 3, 6, 7]})
983-
# Simple run 1:1
984-
pd.merge_asof(left, right, on='a', validate="1:1")
985-
986-
# Dups on right
987-
right_w_dups = right.append(pd.DataFrame({'a': [7],
988-
'right_val': [-2]}))
989-
right_w_dups = right_w_dups.sort_values('a')
990-
991-
pd.merge_asof(left, right_w_dups, on='a', validate="1:m")
992-
with pytest.raises(ValueError):
993-
pd.merge_asof(left, right_w_dups, on='a', validate="1:1")
994-
995-
# Dups on left
996-
left_w_dups = left.append(pd.DataFrame({'a': [1],
997-
'left_val': [-2]}))
998-
left_w_dups = left_w_dups.sort_values('a')
999-
pd.merge_asof(left_w_dups, right, on='a', validate="m:1")
1000-
with pytest.raises(ValueError):
1001-
pd.merge_asof(left_w_dups, right_w_dups, on='a', validate="1:1")

0 commit comments

Comments
 (0)