From 91a3f1663437293eb1ede6e8784bfd7c95fb9b9a Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Sun, 27 May 2018 20:25:41 +0530 Subject: [PATCH 01/17] Initial commit GH21220 --- pandas/core/reshape/merge.py | 20 +++++++++++++------- pandas/tests/reshape/merge/test_merge.py | 21 +++++++++++++++++++++ 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 4d8897fb7c811..2ef97f49f4cdc 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -11,7 +11,7 @@ import pandas.compat as compat from pandas import (Categorical, DataFrame, - Index, MultiIndex, Timedelta) + Index, MultiIndex, Timedelta, Series) from pandas.core.arrays.categorical import _recode_for_categories from pandas.core.frame import _merge_doc from pandas.core.dtypes.common import ( @@ -492,6 +492,10 @@ def __init__(self, left, right, how='inner', on=None, left_index=False, right_index=False, sort=True, suffixes=('_x', '_y'), copy=True, indicator=False, validate=None): + if isinstance(left, Series): + left = left.to_frame() + if isinstance(right, Series): + right = right.to_frame() self.left = self.orig_left = left self.right = self.orig_right = right self.how = how @@ -535,12 +539,14 @@ def __init__(self, left, right, how='inner', on=None, '{right_index}'.format(right_index=type(right_index))) # warn user when merging between different levels - if left.columns.nlevels != right.columns.nlevels: - msg = ('merging between different levels can give an unintended ' - 'result ({left} levels on the left, {right} on the right)' - ).format(left=left.columns.nlevels, - right=right.columns.nlevels) - warnings.warn(msg, UserWarning) + if isinstance(left, DataFrame) and isinstance(right, DataFrame): + if left.columns.nlevels != right.columns.nlevels: + msg = ('merging between different levels can give an ' + 'unintended result ({left} levels on the left, ' + '{right} on the right)' + ).format(left=left.columns.nlevels, + right=right.columns.nlevels) + warnings.warn(msg, UserWarning) self._validate_specification() diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 8e639edd34b18..42a39b282f53f 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1864,3 +1864,24 @@ def test_merge_index_types(index): OrderedDict([('left_data', [1, 2]), ('right_data', [1.0, 2.0])]), index=index) assert_frame_equal(result, expected) + + +def test_merge_series(): + # GH 21220 + a = pd.DataFrame({"A": [1, 2, 3, 4]}, + index=pd.MultiIndex.from_product([['a', 'b'], [0, 1]], + names=['outer', 'inner'])) + b = pd.Series([1, 2, 3, 4], + index=pd.MultiIndex.from_product([['a', 'b'], [1, 2]], + names=['outer', 'inner']), name='B') + expected = pd.DataFrame({"A": [2, 4], "B": [1, 3]}, + index=pd.MultiIndex.from_product([['a', 'b'], [1]], + names=['outer', 'inner'])) + + # Testing current merge behvaior is as before + result = pd.merge(a, b.to_frame(), on=['outer', 'inner']) + tm.assert_frame_equal(result, expected) + + # Testing changed merge behvaior is as expected + result = pd.merge(a, b, on=['outer', 'inner']) + tm.assert_frame_equal(result, expected) From 64e6e66d0d28e6132c64b917fc95a5b271d46391 Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Sun, 27 May 2018 20:36:22 +0530 Subject: [PATCH 02/17] Removed some now unrequired edits left GH21220 --- pandas/core/reshape/merge.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 2ef97f49f4cdc..ed1e184d9915a 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -539,14 +539,12 @@ def __init__(self, left, right, how='inner', on=None, '{right_index}'.format(right_index=type(right_index))) # warn user when merging between different levels - if isinstance(left, DataFrame) and isinstance(right, DataFrame): - if left.columns.nlevels != right.columns.nlevels: - msg = ('merging between different levels can give an ' - 'unintended result ({left} levels on the left, ' - '{right} on the right)' - ).format(left=left.columns.nlevels, - right=right.columns.nlevels) - warnings.warn(msg, UserWarning) + if left.columns.nlevels != right.columns.nlevels: + msg = ('merging between different levels can give an unintended ' + 'result ({left} levels on the left, {right} on the right)' + ).format(left=left.columns.nlevels, + right=right.columns.nlevels) + warnings.warn(msg, UserWarning) self._validate_specification() From 8939fad829fc692bad978a57ad70dbe8016c04b4 Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Mon, 28 May 2018 07:50:48 +0530 Subject: [PATCH 03/17] Edits to test_join GH21220 --- pandas/tests/reshape/merge/test_join.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 1b8f3632d381c..52f5dcb8d5db2 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -230,7 +230,10 @@ def test_join_on_fails_with_different_column_counts(self): def test_join_on_fails_with_wrong_object_type(self): # GH12081 - wrongly_typed = [Series([0, 1]), 2, 'str', None, np.array([0, 1])] + """ GH 21220 - merging of Series and DataFrame is now allowed + Edited the test to remove the Series object from 'wrongly_typed' + """ + wrongly_typed = [2, 'str', None, np.array([0, 1])] df = DataFrame({'a': [1, 1]}) for obj in wrongly_typed: From b09ffb7d779321102fb2f94d0c046dd70bec1c41 Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Mon, 28 May 2018 18:35:09 +0530 Subject: [PATCH 04/17] Minor comment edit to force Travis CI rerun GH21220 --- pandas/tests/reshape/merge/test_join.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 52f5dcb8d5db2..b2ba93aa4813f 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -230,7 +230,7 @@ def test_join_on_fails_with_different_column_counts(self): def test_join_on_fails_with_wrong_object_type(self): # GH12081 - """ GH 21220 - merging of Series and DataFrame is now allowed + """ GH21220 - merging of Series and DataFrame is now allowed Edited the test to remove the Series object from 'wrongly_typed' """ wrongly_typed = [2, 'str', None, np.array([0, 1])] From 1a0e5a2f5c95c5af7966f4fc8f7b3bdaeb357213 Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Mon, 28 May 2018 21:59:21 +0530 Subject: [PATCH 05/17] Updated comments in test GH21220 --- pandas/tests/reshape/merge/test_merge.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 42a39b282f53f..2040934b5cc2c 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1878,10 +1878,10 @@ def test_merge_series(): index=pd.MultiIndex.from_product([['a', 'b'], [1]], names=['outer', 'inner'])) - # Testing current merge behvaior is as before + # Test merge with a DataFrame and a Series 'converted-to-DataFrame' object result = pd.merge(a, b.to_frame(), on=['outer', 'inner']) tm.assert_frame_equal(result, expected) - # Testing changed merge behvaior is as expected + # Test merge with a DataFrame and a Series object result = pd.merge(a, b, on=['outer', 'inner']) tm.assert_frame_equal(result, expected) From 59b487dc13d54bb96f9904a45a659a1935e44824 Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Thu, 31 May 2018 22:57:22 +0530 Subject: [PATCH 06/17] Validate operand, parametrized tests, whatnew GH21220 --- doc/source/whatsnew/v0.23.1.txt | 1 + pandas/core/reshape/merge.py | 26 ++++++++------- pandas/tests/reshape/merge/test_merge.py | 40 +++++++++++++++++------- 3 files changed, 45 insertions(+), 22 deletions(-) diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 4876678baaa6e..15871163d2b34 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -16,6 +16,7 @@ New features ~~~~~~~~~~~~ - :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with MultiIndex (:issue:`21115`) +- :func: merge now allows a ``DataFrame`` and a ``Series`` with a name as inputs (:issue:`21220`) .. _whatsnew_0231.deprecations: diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index ed1e184d9915a..3ea4b02ed3359 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -492,10 +492,8 @@ def __init__(self, left, right, how='inner', on=None, left_index=False, right_index=False, sort=True, suffixes=('_x', '_y'), copy=True, indicator=False, validate=None): - if isinstance(left, Series): - left = left.to_frame() - if isinstance(right, Series): - right = right.to_frame() + left = validate_operand(left) + right = validate_operand(right) self.left = self.orig_left = left self.right = self.orig_right = right self.how = how @@ -522,13 +520,6 @@ def __init__(self, left, right, how='inner', on=None, raise ValueError( 'indicator option can only accept boolean or string arguments') - if not isinstance(left, DataFrame): - raise ValueError('can not merge DataFrame with instance of ' - 'type {left}'.format(left=type(left))) - if not isinstance(right, DataFrame): - raise ValueError('can not merge DataFrame with instance of ' - 'type {right}'.format(right=type(right))) - if not is_bool(left_index): raise ValueError( 'left_index parameter must be of type bool, not ' @@ -1643,3 +1634,16 @@ def _should_fill(lname, rname): def _any(x): return x is not None and com._any_not_none(*x) + + +def validate_operand(obj): + if isinstance(obj, DataFrame): + return obj + elif isinstance(obj, Series): + if obj.name is None: + raise ValueError('Cannot merge a Series without a name') + else: + return obj.to_frame() + else: + raise ValueError('Cannot merge a DataFrame or a Series with ' + 'instance of type {obj}'.format(obj=type(obj))) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 2040934b5cc2c..29630ca827ee3 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1866,22 +1866,40 @@ def test_merge_index_types(index): assert_frame_equal(result, expected) -def test_merge_series(): +@pytest.mark.parametrize("on,left_on,right_on,left_index,right_index,nms,nm", [ + (['outer', 'inner'], None, None, False, False, + ['outer', 'inner'], 'B'), + (None, None, None, True, True, ['outer', 'inner'], + 'B'), + (None, ['outer', 'inner'], None, False, True, None, + 'B'), + (None, None, ['outer', 'inner'], True, False, None, + 'B'), + (['outer', 'inner'], None, None, False, False, + ['outer', 'inner'], None), + (None, None, None, True, True, ['outer', 'inner'], + None), + (None, ['outer', 'inner'], None, False, True, None, + None), + (None, None, ['outer', 'inner'], True, False, None, + None), + ]) +def test_merge_series(on, left_on, right_on, left_index, right_index, nms, nm): # GH 21220 a = pd.DataFrame({"A": [1, 2, 3, 4]}, index=pd.MultiIndex.from_product([['a', 'b'], [0, 1]], names=['outer', 'inner'])) b = pd.Series([1, 2, 3, 4], index=pd.MultiIndex.from_product([['a', 'b'], [1, 2]], - names=['outer', 'inner']), name='B') + names=['outer', 'inner']), name=nm) expected = pd.DataFrame({"A": [2, 4], "B": [1, 3]}, index=pd.MultiIndex.from_product([['a', 'b'], [1]], - names=['outer', 'inner'])) - - # Test merge with a DataFrame and a Series 'converted-to-DataFrame' object - result = pd.merge(a, b.to_frame(), on=['outer', 'inner']) - tm.assert_frame_equal(result, expected) - - # Test merge with a DataFrame and a Series object - result = pd.merge(a, b, on=['outer', 'inner']) - tm.assert_frame_equal(result, expected) + names=nms)) + if nm is not None: + result = pd.merge(a, b, on=on, left_on=left_on, right_on=right_on, + left_index=left_index, right_index=right_index) + tm.assert_frame_equal(result, expected) + else: + with tm.assert_raises_regex(ValueError, 'a Series without a name'): + result = pd.merge(a, b, on=on, left_on=left_on, right_on=right_on, + left_index=left_index, right_index=right_index) From 0d14cc26e570208e70d34f77bfd4cea3b5f1337d Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Thu, 7 Jun 2018 21:57:53 +0530 Subject: [PATCH 07/17] Moved whatsnew to 0.24.0 GH21220 --- doc/source/whatsnew/v0.23.1.txt | 2 +- doc/source/whatsnew/v0.24.0.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 15871163d2b34..9a1ee2b8a77de 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -16,7 +16,7 @@ New features ~~~~~~~~~~~~ - :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with MultiIndex (:issue:`21115`) -- :func: merge now allows a ``DataFrame`` and a ``Series`` with a name as inputs (:issue:`21220`) + .. _whatsnew_0231.deprecations: diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 43e513c9d03f5..6a48e3d728c83 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -7,6 +7,7 @@ v0.24.0 New features ~~~~~~~~~~~~ +- :func: merge now allows a ``DataFrame`` and a ``Series`` with a name as inputs (:issue:`21220`) .. _whatsnew_0240.enhancements.other: From 12da2ce64e1c81f257f978996fe2765198298f18 Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Thu, 7 Jun 2018 21:59:14 +0530 Subject: [PATCH 08/17] Moved whatsnew to 0.24.0 GH21220 --- doc/source/whatsnew/v0.23.1.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 9a1ee2b8a77de..4876678baaa6e 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -18,7 +18,6 @@ New features - :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with MultiIndex (:issue:`21115`) - .. _whatsnew_0231.deprecations: Deprecations From 2d689ad8c643df95a3e0479d2da70ce351fb1e6c Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Fri, 15 Jun 2018 14:36:13 +0530 Subject: [PATCH 09/17] Updated comment in test_join GH21220 --- pandas/tests/reshape/merge/test_join.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index b2ba93aa4813f..bf1ae23ea3405 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -230,10 +230,9 @@ def test_join_on_fails_with_different_column_counts(self): def test_join_on_fails_with_wrong_object_type(self): # GH12081 - """ GH21220 - merging of Series and DataFrame is now allowed - Edited the test to remove the Series object from 'wrongly_typed' - """ wrongly_typed = [2, 'str', None, np.array([0, 1])] + # GH21220 - merging of Series and DataFrame is now allowed + # Edited the test to remove the Series object from 'wrongly_typed' df = DataFrame({'a': [1, 1]}) for obj in wrongly_typed: From 687568b20b0a7baeaad1b1e87cc64ae0fbc07a0b Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Tue, 19 Jun 2018 20:39:39 +0530 Subject: [PATCH 10/17] Linting, parameterizing test, whatsnew, raising GH21220 --- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/core/reshape/merge.py | 4 ++-- pandas/tests/reshape/merge/test_join.py | 21 ++++++++++---------- pandas/tests/reshape/merge/test_merge.py | 25 ++++++++---------------- 4 files changed, 22 insertions(+), 30 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 6a48e3d728c83..cde6072dab0d4 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -7,7 +7,7 @@ v0.24.0 New features ~~~~~~~~~~~~ -- :func: merge now allows a ``DataFrame`` and a ``Series`` with a name as inputs (:issue:`21220`) +- :func:`merge` now directly allows merge between objects of type ``DataFrame`` and ``Series`` with a name, without need to convert the ``Series`` object into a ``DataFrame`` beforehand (:issue:`21220`) .. _whatsnew_0240.enhancements.other: diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 3ea4b02ed3359..45b993983a9e0 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1645,5 +1645,5 @@ def validate_operand(obj): else: return obj.to_frame() else: - raise ValueError('Cannot merge a DataFrame or a Series with ' - 'instance of type {obj}'.format(obj=type(obj))) + raise TypeError('Can only merge Series or DataFrame objects, ' + 'a {obj} was passed'.format(obj=type(obj))) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index bf1ae23ea3405..d2be67b841f08 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -228,18 +228,19 @@ def test_join_on_fails_with_different_column_counts(self): index=tm.makeCustomIndex(10, 2)) merge(df, df2, right_on='a', left_on=['a', 'b']) - def test_join_on_fails_with_wrong_object_type(self): - # GH12081 - wrongly_typed = [2, 'str', None, np.array([0, 1])] + @pytest.mark.parametrize("wrong_type", [2, 'str', None, np.array([0, 1])]) + def test_join_on_fails_with_wrong_object_type(self, wrong_type): + # GH12081 - original issue + # GH21220 - merging of Series and DataFrame is now allowed - # Edited the test to remove the Series object from 'wrongly_typed' - df = DataFrame({'a': [1, 1]}) + # Edited the test to remove the Series object from test parameters + # Also, parameterized the original test - for obj in wrongly_typed: - with tm.assert_raises_regex(ValueError, str(type(obj))): - merge(obj, df, left_on='a', right_on='a') - with tm.assert_raises_regex(ValueError, str(type(obj))): - merge(df, obj, left_on='a', right_on='a') + df = DataFrame({'a': [1, 1]}) + with tm.assert_raises_regex(ValueError, str(type(wrong_type))): + merge(wrong_type, df, left_on='a', right_on='a') + with tm.assert_raises_regex(ValueError, str(type(wrong_type))): + merge(df, wrong_type, left_on='a', right_on='a') def test_join_on_pass_vector(self): expected = self.target.join(self.source, on='C') diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 29630ca827ee3..97bb8fc32f45b 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1867,23 +1867,14 @@ def test_merge_index_types(index): @pytest.mark.parametrize("on,left_on,right_on,left_index,right_index,nms,nm", [ - (['outer', 'inner'], None, None, False, False, - ['outer', 'inner'], 'B'), - (None, None, None, True, True, ['outer', 'inner'], - 'B'), - (None, ['outer', 'inner'], None, False, True, None, - 'B'), - (None, None, ['outer', 'inner'], True, False, None, - 'B'), - (['outer', 'inner'], None, None, False, False, - ['outer', 'inner'], None), - (None, None, None, True, True, ['outer', 'inner'], - None), - (None, ['outer', 'inner'], None, False, True, None, - None), - (None, None, ['outer', 'inner'], True, False, None, - None), - ]) + (['outer', 'inner'], None, None, False, False, ['outer', 'inner'], 'B'), + (None, None, None, True, True, ['outer', 'inner'], 'B'), + (None, ['outer', 'inner'], None, False, True, None, 'B'), + (None, None, ['outer', 'inner'], True, False, None, 'B'), + (['outer', 'inner'], None, None, False, False, ['outer', 'inner'], None), + (None, None, None, True, True, ['outer', 'inner'], None), + (None, ['outer', 'inner'], None, False, True, None, None), + (None, None, ['outer', 'inner'], True, False, None, None)]) def test_merge_series(on, left_on, right_on, left_index, right_index, nms, nm): # GH 21220 a = pd.DataFrame({"A": [1, 2, 3, 4]}, From 7ed96889e73967ed6c46b20cc3d190a18bab9bd0 Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Tue, 19 Jun 2018 22:15:04 +0530 Subject: [PATCH 11/17] Updated test_join GH21220 --- pandas/tests/reshape/merge/test_join.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index d2be67b841f08..2fe5c006e240d 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -237,9 +237,9 @@ def test_join_on_fails_with_wrong_object_type(self, wrong_type): # Also, parameterized the original test df = DataFrame({'a': [1, 1]}) - with tm.assert_raises_regex(ValueError, str(type(wrong_type))): + with tm.assert_raises_regex(TypeError, str(type(wrong_type))): merge(wrong_type, df, left_on='a', right_on='a') - with tm.assert_raises_regex(ValueError, str(type(wrong_type))): + with tm.assert_raises_regex(TypeError, str(type(wrong_type))): merge(df, wrong_type, left_on='a', right_on='a') def test_join_on_pass_vector(self): From 2ce56e7d61c9f9853731a853f67524f451f9eea6 Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Wed, 20 Jun 2018 06:52:27 +0530 Subject: [PATCH 12/17] Minor update to whatsnew to force TravisCI rebuild GH21220 --- doc/source/whatsnew/v0.24.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index cde6072dab0d4..44259e03ed87b 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -7,7 +7,7 @@ v0.24.0 New features ~~~~~~~~~~~~ -- :func:`merge` now directly allows merge between objects of type ``DataFrame`` and ``Series`` with a name, without need to convert the ``Series`` object into a ``DataFrame`` beforehand (:issue:`21220`) +- :func:`merge` now directly allows merge between objects of type ``DataFrame`` and ``Series`` with a name, without the need to convert the ``Series`` object into a ``DataFrame`` beforehand (:issue:`21220`) .. _whatsnew_0240.enhancements.other: From ac9d5a19b51a07ad535bca79f7c9508600968005 Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Wed, 20 Jun 2018 20:38:34 +0530 Subject: [PATCH 13/17] Updating documentation GH21220 --- doc/source/merging.rst | 33 +++++++++++++++++---------------- pandas/core/frame.py | 7 ++++--- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/doc/source/merging.rst b/doc/source/merging.rst index 1161656731f88..f88739efbd27a 100644 --- a/doc/source/merging.rst +++ b/doc/source/merging.rst @@ -531,23 +531,23 @@ all standard database join operations between ``DataFrame`` objects: suffixes=('_x', '_y'), copy=True, indicator=False, validate=None) -- ``left``: A DataFrame object. -- ``right``: Another DataFrame object. +- ``left``: A DataFrame or named Series object. +- ``right``: Another DataFrame or named Series object. - ``on``: Column or index level names to join on. Must be found in both the left - and right DataFrame objects. If not passed and ``left_index`` and + and right DataFrame and/or Series objects. If not passed and ``left_index`` and ``right_index`` are ``False``, the intersection of the columns in the - DataFrames will be inferred to be the join keys. -- ``left_on``: Columns or index levels from the left DataFrame to use as + DataFrames and/or Series will be inferred to be the join keys. +- ``left_on``: Columns or index levels from the left DataFrame or Series to use as keys. Can either be column names, index level names, or arrays with length - equal to the length of the DataFrame. -- ``right_on``: Columns or index levels from the right DataFrame to use as + equal to the length of the DataFrame or Series. +- ``right_on``: Columns or index levels from the right DataFrame or Series to use as keys. Can either be column names, index level names, or arrays with length - equal to the length of the DataFrame. + equal to the length of the DataFrame or Series. - ``left_index``: If ``True``, use the index (row labels) from the left - DataFrame as its join key(s). In the case of a DataFrame with a MultiIndex + DataFrame or Series as its join key(s). In the case of a DataFrame or Series with a MultiIndex (hierarchical), the number of levels must match the number of join keys - from the right DataFrame. -- ``right_index``: Same usage as ``left_index`` for the right DataFrame + from the right DataFrame or Series. +- ``right_index``: Same usage as ``left_index`` for the right DataFrame or Series - ``how``: One of ``'left'``, ``'right'``, ``'outer'``, ``'inner'``. Defaults to ``inner``. See below for more detailed description of each method. - ``sort``: Sort the result DataFrame by the join keys in lexicographical @@ -555,7 +555,7 @@ all standard database join operations between ``DataFrame`` objects: substantially in many cases. - ``suffixes``: A tuple of string suffixes to apply to overlapping columns. Defaults to ``('_x', '_y')``. -- ``copy``: Always copy data (default ``True``) from the passed DataFrame +- ``copy``: Always copy data (default ``True``) from the passed DataFrame or named Series objects, even when reindexing is not necessary. Cannot be avoided in many cases but may improve performance / memory usage. The cases where copying can be avoided are somewhat pathological but this option is provided @@ -563,8 +563,8 @@ all standard database join operations between ``DataFrame`` objects: - ``indicator``: Add a column to the output DataFrame called ``_merge`` with information on the source of each row. ``_merge`` is Categorical-type and takes on a value of ``left_only`` for observations whose merge key - only appears in ``'left'`` DataFrame, ``right_only`` for observations whose - merge key only appears in ``'right'`` DataFrame, and ``both`` if the + only appears in ``'left'`` DataFrame or Series, ``right_only`` for observations whose + merge key only appears in ``'right'`` DataFrame or Series, and ``both`` if the observation's merge key is found in both. - ``validate`` : string, default None. @@ -584,9 +584,10 @@ all standard database join operations between ``DataFrame`` objects: Support for specifying index levels as the ``on``, ``left_on``, and ``right_on`` parameters was added in version 0.23.0. + Support for merging named ``Series`` objects was added in version 0.24.0. -The return type will be the same as ``left``. If ``left`` is a ``DataFrame`` -and ``right`` is a subclass of DataFrame, the return type will still be +The return type will be the same as ``left``. If ``left`` is a ``DataFrame`` or named ``Series`` +and ``right`` is a subclass of ``DataFrame``, the return type will still be ``DataFrame``. ``merge`` is a function in the pandas namespace, and it is also available as a diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d4ce8dc166b09..3c349d65e69ad 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -135,8 +135,8 @@ """ _merge_doc = """ -Merge DataFrame objects by performing a database-style join operation by -columns or indexes. +Merge DataFrame or named Series objects by performing a database-style join +operation by columns or indexes. If joining columns on columns, the DataFrame indexes *will be ignored*. Otherwise if joining indexes on indexes or indexes on a column or @@ -144,7 +144,7 @@ Parameters ----------%s -right : DataFrame +right : DataFrame or named Series how : {'left', 'right', 'outer', 'inner'}, default 'inner' * left: use only keys from left frame, similar to a SQL left outer join; preserve key order @@ -208,6 +208,7 @@ ----- Support for specifying index levels as the `on`, `left_on`, and `right_on` parameters was added in version 0.23.0 +Support for merging named Series objects was added in version 0.24.0 Examples -------- From e896a917fecd5779d4a0a574747f667272684d46 Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Wed, 20 Jun 2018 20:53:29 +0530 Subject: [PATCH 14/17] Edits to rst GH21220 --- doc/source/merging.rst | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/doc/source/merging.rst b/doc/source/merging.rst index fdb347a7f347b..5ee28746c0956 100644 --- a/doc/source/merging.rst +++ b/doc/source/merging.rst @@ -506,8 +506,8 @@ You can also pass a list of dicts or Series: .. _merging.join: -Database-style DataFrame joining/merging ----------------------------------------- +Database-style DataFrame or named Series joining/merging +-------------------------------------------------------- pandas has full-featured, **high performance** in-memory join operations idiomatically very similar to relational databases like SQL. These methods @@ -522,7 +522,7 @@ Users who are familiar with SQL but new to pandas might be interested in a :ref:`comparison with SQL`. pandas provides a single function, :func:`~pandas.merge`, as the entry point for -all standard database join operations between ``DataFrame`` objects: +all standard database join operations between ``DataFrame`` or named ``Series`` objects: :: @@ -531,23 +531,23 @@ all standard database join operations between ``DataFrame`` objects: suffixes=('_x', '_y'), copy=True, indicator=False, validate=None) -* ``left``: A DataFrame object. -* ``right``: Another DataFrame object. +* ``left``: A DataFrame or named Series object. +* ``right``: Another DataFrame or named Series object. * ``on``: Column or index level names to join on. Must be found in both the left - and right DataFrame objects. If not passed and ``left_index`` and + and right DataFrame and/or Series objects. If not passed and ``left_index`` and ``right_index`` are ``False``, the intersection of the columns in the - DataFrames will be inferred to be the join keys. -* ``left_on``: Columns or index levels from the left DataFrame to use as + DataFrames and/or Series will be inferred to be the join keys. +* ``left_on``: Columns or index levels from the left DataFrame or Series to use as keys. Can either be column names, index level names, or arrays with length - equal to the length of the DataFrame. -* ``right_on``: Columns or index levels from the right DataFrame to use as + equal to the length of the DataFrame or Series. +* ``right_on``: Columns or index levels from the right DataFrame or Series to use as keys. Can either be column names, index level names, or arrays with length - equal to the length of the DataFrame. + equal to the length of the DataFrame or Series. * ``left_index``: If ``True``, use the index (row labels) from the left - DataFrame as its join key(s). In the case of a DataFrame with a MultiIndex + DataFrame or Series as its join key(s). In the case of a DataFrame or Series with a MultiIndex (hierarchical), the number of levels must match the number of join keys - from the right DataFrame. -* ``right_index``: Same usage as ``left_index`` for the right DataFrame + from the right DataFrame or Series. +* ``right_index``: Same usage as ``left_index`` for the right DataFrame or Series * ``how``: One of ``'left'``, ``'right'``, ``'outer'``, ``'inner'``. Defaults to ``inner``. See below for more detailed description of each method. * ``sort``: Sort the result DataFrame by the join keys in lexicographical @@ -555,7 +555,7 @@ all standard database join operations between ``DataFrame`` objects: substantially in many cases. * ``suffixes``: A tuple of string suffixes to apply to overlapping columns. Defaults to ``('_x', '_y')``. -* ``copy``: Always copy data (default ``True``) from the passed DataFrame +* ``copy``: Always copy data (default ``True``) from the passed DataFrame or named Series objects, even when reindexing is not necessary. Cannot be avoided in many cases but may improve performance / memory usage. The cases where copying can be avoided are somewhat pathological but this option is provided @@ -587,8 +587,7 @@ all standard database join operations between ``DataFrame`` objects: Support for merging named ``Series`` objects was added in version 0.24.0. The return type will be the same as ``left``. If ``left`` is a ``DataFrame`` or named ``Series`` -and ``right`` is a subclass of ``DataFrame``, the return type will still be -``DataFrame``. +and ``right`` is a subclass of ``DataFrame``, the return type will still be ``DataFrame``. ``merge`` is a function in the pandas namespace, and it is also available as a ``DataFrame`` instance method :meth:`~DataFrame.merge`, with the calling From 69d8bda85dc090db693064455df762e063f533ca Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Wed, 20 Jun 2018 20:57:19 +0530 Subject: [PATCH 15/17] Whatsnew GH21220 --- doc/source/whatsnew/v0.24.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 515a9cfecf91e..f44c2f49836de 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -7,7 +7,7 @@ v0.24.0 New features ~~~~~~~~~~~~ -- :func:`merge` now directly allows merge between objects of type ``DataFrame`` and ``Series`` with a name, without the need to convert the ``Series`` object into a ``DataFrame`` beforehand (:issue:`21220`) +- :func:`merge` now directly allows merge between objects of type ``DataFrame`` and named ``Series``, without the need to convert the ``Series`` object into a ``DataFrame`` beforehand (:issue:`21220`) - ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`) From c4c988c91dc5caa337f96d8360511f64866eefd2 Mon Sep 17 00:00:00 2001 From: Kalyan Gokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Mon, 23 Jul 2018 19:29:03 +0530 Subject: [PATCH 16/17] Update test_join.py --- pandas/tests/reshape/merge/test_join.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 2fe5c006e240d..7e8ddd7eabffe 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -234,7 +234,6 @@ def test_join_on_fails_with_wrong_object_type(self, wrong_type): # GH21220 - merging of Series and DataFrame is now allowed # Edited the test to remove the Series object from test parameters - # Also, parameterized the original test df = DataFrame({'a': [1, 1]}) with tm.assert_raises_regex(TypeError, str(type(wrong_type))): From d099fd6c78db0739d9abe04bfb5c89720d234a7b Mon Sep 17 00:00:00 2001 From: Kalyan Gokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Mon, 23 Jul 2018 20:15:59 +0530 Subject: [PATCH 17/17] Force CI tests --- pandas/tests/reshape/merge/test_join.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 7e8ddd7eabffe..09f511886583c 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -233,7 +233,7 @@ def test_join_on_fails_with_wrong_object_type(self, wrong_type): # GH12081 - original issue # GH21220 - merging of Series and DataFrame is now allowed - # Edited the test to remove the Series object from test parameters + # Edited test to remove the Series object from test parameters df = DataFrame({'a': [1, 1]}) with tm.assert_raises_regex(TypeError, str(type(wrong_type))):