From cf5669862842c6a7042056f32d3e70c3ec4b7472 Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Sun, 30 Sep 2018 00:12:20 -0700 Subject: [PATCH 1/7] DOC GH22897 Fix docstring of join in pandas/core/frame.py --- pandas/core/frame.py | 93 ++++++++++++++++++++++---------------------- 1 file changed, 46 insertions(+), 47 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b4e8b4e3a6bec..3f72d1f9aab29 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6440,6 +6440,8 @@ def append(self, other, ignore_index=False, def join(self, other, on=None, how='left', lsuffix='', rsuffix='', sort=False): """ + Append columns of another DataFrame. + Join columns with other DataFrame either on index or on a key column. Efficiently Join multiple DataFrame objects by index at once by passing a list. @@ -6449,31 +6451,31 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', other : DataFrame, Series with name field set, or list of DataFrame Index should be similar to one of the columns in this one. If a Series is passed, its name attribute must be set, and that will be - used as the column name in the resulting joined DataFrame + used as the column name in the resulting joined DataFrame. on : name, tuple/list of names, or array-like Column or index level name(s) in the caller to join on the index in `other`, otherwise joins index-on-index. If multiple values given, the `other` DataFrame must have a MultiIndex. Can pass an array as the join key if it is not already contained in - the calling DataFrame. Like an Excel VLOOKUP operation + the calling DataFrame. Like an Excel VLOOKUP operation. how : {'left', 'right', 'outer', 'inner'}, default: 'left' How to handle the operation of the two objects. * left: use calling frame's index (or column if on is specified) - * right: use other frame's index + * right: use other frame's index. * outer: form union of calling frame's index (or column if on is - specified) with other frame's index, and sort it - lexicographically + specified) with other frame's index, and sort it. + lexicographically. * inner: form intersection of calling frame's index (or column if on is specified) with other frame's index, preserving the order - of the calling's one + of the calling's one. lsuffix : string - Suffix to use from left frame's overlapping columns + Suffix to use from left frame's overlapping columns. rsuffix : string - Suffix to use from right frame's overlapping columns + Suffix to use from right frame's overlapping columns. sort : boolean, default False Order result DataFrame lexicographically by the join key. If False, - the order of the join key depends on the join type (how keyword) + the order of the join key depends on the join type (how keyword). Notes ----- @@ -6485,54 +6487,53 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', Examples -------- + >>> import pandas as pd + >>> caller = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'], ... 'A': ['A0', 'A1', 'A2', 'A3', 'A4', 'A5']}) >>> caller - A key - 0 A0 K0 - 1 A1 K1 - 2 A2 K2 - 3 A3 K3 - 4 A4 K4 - 5 A5 K5 + key A + 0 K0 A0 + 1 K1 A1 + 2 K2 A2 + 3 K3 A3 + 4 K4 A4 + 5 K5 A5 >>> other = pd.DataFrame({'key': ['K0', 'K1', 'K2'], ... 'B': ['B0', 'B1', 'B2']}) >>> other - B key - 0 B0 K0 - 1 B1 K1 - 2 B2 K2 + key B + 0 K0 B0 + 1 K1 B1 + 2 K2 B2 Join DataFrames using their indexes. >>> caller.join(other, lsuffix='_caller', rsuffix='_other') - - >>> A key_caller B key_other - 0 A0 K0 B0 K0 - 1 A1 K1 B1 K1 - 2 A2 K2 B2 K2 - 3 A3 K3 NaN NaN - 4 A4 K4 NaN NaN - 5 A5 K5 NaN NaN - + key_caller A key_other B + 0 K0 A0 K0 B0 + 1 K1 A1 K1 B1 + 2 K2 A2 K2 B2 + 3 K3 A3 NaN NaN + 4 K4 A4 NaN NaN + 5 K5 A5 NaN NaN If we want to join using the key columns, we need to set key to be the index in both caller and other. The joined DataFrame will have key as its index. >>> caller.set_index('key').join(other.set_index('key')) - - >>> A B - key - K0 A0 B0 - K1 A1 B1 - K2 A2 B2 - K3 A3 NaN - K4 A4 NaN - K5 A5 NaN + A B + key + K0 A0 B0 + K1 A1 B1 + K2 A2 B2 + K3 A3 NaN + K4 A4 NaN + K5 A5 NaN Another option to join using the key columns is to use the on parameter. DataFrame.join always uses other's index but we can use any @@ -6540,15 +6541,13 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', index in the result. >>> caller.join(other.set_index('key'), on='key') - - >>> A key B - 0 A0 K0 B0 - 1 A1 K1 B1 - 2 A2 K2 B2 - 3 A3 K3 NaN - 4 A4 K4 NaN - 5 A5 K5 NaN - + key A B + 0 K0 A0 B0 + 1 K1 A1 B1 + 2 K2 A2 B2 + 3 K3 A3 NaN + 4 K4 A4 NaN + 5 K5 A5 NaN See also -------- From 988c40c9a44f5a951aa69318abc4c7a2a54e93bb Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Sun, 30 Sep 2018 09:35:15 -0700 Subject: [PATCH 2/7] DOC GH22897 Fix other formatting issues in pandas/core/frame.py --- pandas/core/frame.py | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3f72d1f9aab29..5e9990dd1fc54 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6440,7 +6440,7 @@ def append(self, other, ignore_index=False, def join(self, other, on=None, how='left', lsuffix='', rsuffix='', sort=False): """ - Append columns of another DataFrame. + Join columns of another DataFrame. Join columns with other DataFrame either on index or on a key column. Efficiently Join multiple DataFrame objects by index at once by @@ -6448,11 +6448,11 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', Parameters ---------- - other : DataFrame, Series with name field set, or list of DataFrame + other : DataFrame, Series, or list of DataFrame Index should be similar to one of the columns in this one. If a Series is passed, its name attribute must be set, and that will be used as the column name in the resulting joined DataFrame. - on : name, tuple/list of names, or array-like + on : str, list of str, or array-like Column or index level name(s) in the caller to join on the index in `other`, otherwise joins index-on-index. If multiple values given, the `other` DataFrame must have a MultiIndex. Can @@ -6469,11 +6469,11 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', * inner: form intersection of calling frame's index (or column if on is specified) with other frame's index, preserving the order of the calling's one. - lsuffix : string + lsuffix : str Suffix to use from left frame's overlapping columns. - rsuffix : string + rsuffix : str Suffix to use from right frame's overlapping columns. - sort : boolean, default False + sort : bool, default False Order result DataFrame lexicographically by the join key. If False, the order of the join key depends on the join type (how keyword). @@ -6485,14 +6485,17 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', Support for specifying index levels as the `on` parameter was added in version 0.23.0 + See Also + -------- + DataFrame.merge : For column(s)-on-columns(s) operations. + Examples -------- - >>> import pandas as pd - >>> caller = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'], + >>> df = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'], ... 'A': ['A0', 'A1', 'A2', 'A3', 'A4', 'A5']}) - >>> caller + >>> df key A 0 K0 A0 1 K1 A1 @@ -6512,7 +6515,7 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', Join DataFrames using their indexes. - >>> caller.join(other, lsuffix='_caller', rsuffix='_other') + >>> df.join(other, lsuffix='_caller', rsuffix='_other') key_caller A key_other B 0 K0 A0 K0 B0 1 K1 A1 K1 B1 @@ -6522,10 +6525,10 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', 5 K5 A5 NaN NaN If we want to join using the key columns, we need to set key to be - the index in both caller and other. The joined DataFrame will have + the index in both df and other. The joined DataFrame will have key as its index. - >>> caller.set_index('key').join(other.set_index('key')) + >>> df.set_index('key').join(other.set_index('key')) A B key K0 A0 B0 @@ -6537,10 +6540,10 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', Another option to join using the key columns is to use the on parameter. DataFrame.join always uses other's index but we can use any - column in the caller. This method preserves the original caller's + column in df. This method preserves the original DataFrame's index in the result. - >>> caller.join(other.set_index('key'), on='key') + >>> df.join(other.set_index('key'), on='key') key A B 0 K0 A0 B0 1 K1 A1 B1 @@ -6549,10 +6552,6 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', 4 K4 A4 NaN 5 K5 A5 NaN - See also - -------- - DataFrame.merge : For column(s)-on-columns(s) operations - Returns ------- joined : DataFrame From 8139490422a8a5c97c6bf2cb97774c9a04ab3ae1 Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Sun, 30 Sep 2018 10:24:58 -0700 Subject: [PATCH 3/7] DOC GH22897 Fix additional formatting issues in pandas/core/frame.py --- pandas/core/frame.py | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5e9990dd1fc54..a5add27ed30d8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6442,8 +6442,8 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', """ Join columns of another DataFrame. - Join columns with other DataFrame either on index or on a key - column. Efficiently Join multiple DataFrame objects by index at once by + Join columns with `other` DataFrame either on index or on a key + column. Efficiently join multiple DataFrame objects by index at once by passing a list. Parameters @@ -6452,38 +6452,43 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', Index should be similar to one of the columns in this one. If a Series is passed, its name attribute must be set, and that will be used as the column name in the resulting joined DataFrame. - on : str, list of str, or array-like + on : str, list of str, or array-like, optional Column or index level name(s) in the caller to join on the index in `other`, otherwise joins index-on-index. If multiple values given, the `other` DataFrame must have a MultiIndex. Can pass an array as the join key if it is not already contained in the calling DataFrame. Like an Excel VLOOKUP operation. - how : {'left', 'right', 'outer', 'inner'}, default: 'left' + how : {'left', 'right', 'outer', 'inner'}, default 'left' How to handle the operation of the two objects. * left: use calling frame's index (or column if on is specified) - * right: use other frame's index. + * right: use `other`'s index. * outer: form union of calling frame's index (or column if on is - specified) with other frame's index, and sort it. + specified) with `other`'s index, and sort it. lexicographically. * inner: form intersection of calling frame's index (or column if - on is specified) with other frame's index, preserving the order + on is specified) with `other`'s index, preserving the order of the calling's one. - lsuffix : str + lsuffix : str, default '' Suffix to use from left frame's overlapping columns. - rsuffix : str + rsuffix : str, default '' Suffix to use from right frame's overlapping columns. sort : bool, default False Order result DataFrame lexicographically by the join key. If False, the order of the join key depends on the join type (how keyword). + Returns + ------- + DataFrame + A dataframe containing columns from both the caller and `other`. + Notes ----- - on, lsuffix, and rsuffix options are not supported when passing a list - of DataFrame objects + Options `on`, `lsuffix`, and `rsuffix` options are not supported + when passing a list of DataFrame objects. Support for specifying index levels as the `on` parameter was added - in version 0.23.0 + in version 0.23.0. See Also -------- @@ -6525,7 +6530,7 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', 5 K5 A5 NaN NaN If we want to join using the key columns, we need to set key to be - the index in both df and other. The joined DataFrame will have + the index in both `df` and `other`. The joined DataFrame will have key as its index. >>> df.set_index('key').join(other.set_index('key')) @@ -6538,9 +6543,9 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', K4 A4 NaN K5 A5 NaN - Another option to join using the key columns is to use the on - parameter. DataFrame.join always uses other's index but we can use any - column in df. This method preserves the original DataFrame's + Another option to join using the key columns is to use the `on` + parameter. DataFrame.join always uses `other`'s index but we can use + any column in `df`. This method preserves the original DataFrame's index in the result. >>> df.join(other.set_index('key'), on='key') @@ -6551,10 +6556,6 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', 3 K3 A3 NaN 4 K4 A4 NaN 5 K5 A5 NaN - - Returns - ------- - joined : DataFrame """ # For SparseDataFrame's benefit return self._join_compat(other, on=on, how=how, lsuffix=lsuffix, From 762f66d9fc802d2d241300cc350c3262865e7526 Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Sun, 30 Sep 2018 10:29:39 -0700 Subject: [PATCH 4/7] DOC GH22897 Modified previous commit to use double backticks when appropriate --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a5add27ed30d8..b6375031f09b8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6530,7 +6530,7 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', 5 K5 A5 NaN NaN If we want to join using the key columns, we need to set key to be - the index in both `df` and `other`. The joined DataFrame will have + the index in both ``df`` and `other`. The joined DataFrame will have key as its index. >>> df.set_index('key').join(other.set_index('key')) @@ -6545,7 +6545,7 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', Another option to join using the key columns is to use the `on` parameter. DataFrame.join always uses `other`'s index but we can use - any column in `df`. This method preserves the original DataFrame's + any column in ``df``. This method preserves the original DataFrame's index in the result. >>> df.join(other.set_index('key'), on='key') From 19725a4b468039618357b704b101786653ceaace Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Sun, 30 Sep 2018 10:56:41 -0700 Subject: [PATCH 5/7] Revert "DOC GH22897 Fix additional formatting issues in pandas/core/frame.py" This reverts commit 8139490422a8a5c97c6bf2cb97774c9a04ab3ae1. --- pandas/core/frame.py | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a5add27ed30d8..5e9990dd1fc54 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6442,8 +6442,8 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', """ Join columns of another DataFrame. - Join columns with `other` DataFrame either on index or on a key - column. Efficiently join multiple DataFrame objects by index at once by + Join columns with other DataFrame either on index or on a key + column. Efficiently Join multiple DataFrame objects by index at once by passing a list. Parameters @@ -6452,43 +6452,38 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', Index should be similar to one of the columns in this one. If a Series is passed, its name attribute must be set, and that will be used as the column name in the resulting joined DataFrame. - on : str, list of str, or array-like, optional + on : str, list of str, or array-like Column or index level name(s) in the caller to join on the index in `other`, otherwise joins index-on-index. If multiple values given, the `other` DataFrame must have a MultiIndex. Can pass an array as the join key if it is not already contained in the calling DataFrame. Like an Excel VLOOKUP operation. - how : {'left', 'right', 'outer', 'inner'}, default 'left' + how : {'left', 'right', 'outer', 'inner'}, default: 'left' How to handle the operation of the two objects. * left: use calling frame's index (or column if on is specified) - * right: use `other`'s index. + * right: use other frame's index. * outer: form union of calling frame's index (or column if on is - specified) with `other`'s index, and sort it. + specified) with other frame's index, and sort it. lexicographically. * inner: form intersection of calling frame's index (or column if - on is specified) with `other`'s index, preserving the order + on is specified) with other frame's index, preserving the order of the calling's one. - lsuffix : str, default '' + lsuffix : str Suffix to use from left frame's overlapping columns. - rsuffix : str, default '' + rsuffix : str Suffix to use from right frame's overlapping columns. sort : bool, default False Order result DataFrame lexicographically by the join key. If False, the order of the join key depends on the join type (how keyword). - Returns - ------- - DataFrame - A dataframe containing columns from both the caller and `other`. - Notes ----- - Options `on`, `lsuffix`, and `rsuffix` options are not supported - when passing a list of DataFrame objects. + on, lsuffix, and rsuffix options are not supported when passing a list + of DataFrame objects Support for specifying index levels as the `on` parameter was added - in version 0.23.0. + in version 0.23.0 See Also -------- @@ -6530,7 +6525,7 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', 5 K5 A5 NaN NaN If we want to join using the key columns, we need to set key to be - the index in both `df` and `other`. The joined DataFrame will have + the index in both df and other. The joined DataFrame will have key as its index. >>> df.set_index('key').join(other.set_index('key')) @@ -6543,9 +6538,9 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', K4 A4 NaN K5 A5 NaN - Another option to join using the key columns is to use the `on` - parameter. DataFrame.join always uses `other`'s index but we can use - any column in `df`. This method preserves the original DataFrame's + Another option to join using the key columns is to use the on + parameter. DataFrame.join always uses other's index but we can use any + column in df. This method preserves the original DataFrame's index in the result. >>> df.join(other.set_index('key'), on='key') @@ -6556,6 +6551,10 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', 3 K3 A3 NaN 4 K4 A4 NaN 5 K5 A5 NaN + + Returns + ------- + joined : DataFrame """ # For SparseDataFrame's benefit return self._join_compat(other, on=on, how=how, lsuffix=lsuffix, From 32b475ddc77579e847aaa6a0d846d741636f1977 Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Sun, 30 Sep 2018 10:58:50 -0700 Subject: [PATCH 6/7] Revert "Revert "DOC GH22897 Fix additional formatting issues in pandas/core/frame.py"" This reverts commit 19725a4b468039618357b704b101786653ceaace. --- pandas/core/frame.py | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5e9990dd1fc54..a5add27ed30d8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6442,8 +6442,8 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', """ Join columns of another DataFrame. - Join columns with other DataFrame either on index or on a key - column. Efficiently Join multiple DataFrame objects by index at once by + Join columns with `other` DataFrame either on index or on a key + column. Efficiently join multiple DataFrame objects by index at once by passing a list. Parameters @@ -6452,38 +6452,43 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', Index should be similar to one of the columns in this one. If a Series is passed, its name attribute must be set, and that will be used as the column name in the resulting joined DataFrame. - on : str, list of str, or array-like + on : str, list of str, or array-like, optional Column or index level name(s) in the caller to join on the index in `other`, otherwise joins index-on-index. If multiple values given, the `other` DataFrame must have a MultiIndex. Can pass an array as the join key if it is not already contained in the calling DataFrame. Like an Excel VLOOKUP operation. - how : {'left', 'right', 'outer', 'inner'}, default: 'left' + how : {'left', 'right', 'outer', 'inner'}, default 'left' How to handle the operation of the two objects. * left: use calling frame's index (or column if on is specified) - * right: use other frame's index. + * right: use `other`'s index. * outer: form union of calling frame's index (or column if on is - specified) with other frame's index, and sort it. + specified) with `other`'s index, and sort it. lexicographically. * inner: form intersection of calling frame's index (or column if - on is specified) with other frame's index, preserving the order + on is specified) with `other`'s index, preserving the order of the calling's one. - lsuffix : str + lsuffix : str, default '' Suffix to use from left frame's overlapping columns. - rsuffix : str + rsuffix : str, default '' Suffix to use from right frame's overlapping columns. sort : bool, default False Order result DataFrame lexicographically by the join key. If False, the order of the join key depends on the join type (how keyword). + Returns + ------- + DataFrame + A dataframe containing columns from both the caller and `other`. + Notes ----- - on, lsuffix, and rsuffix options are not supported when passing a list - of DataFrame objects + Options `on`, `lsuffix`, and `rsuffix` options are not supported + when passing a list of DataFrame objects. Support for specifying index levels as the `on` parameter was added - in version 0.23.0 + in version 0.23.0. See Also -------- @@ -6525,7 +6530,7 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', 5 K5 A5 NaN NaN If we want to join using the key columns, we need to set key to be - the index in both df and other. The joined DataFrame will have + the index in both `df` and `other`. The joined DataFrame will have key as its index. >>> df.set_index('key').join(other.set_index('key')) @@ -6538,9 +6543,9 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', K4 A4 NaN K5 A5 NaN - Another option to join using the key columns is to use the on - parameter. DataFrame.join always uses other's index but we can use any - column in df. This method preserves the original DataFrame's + Another option to join using the key columns is to use the `on` + parameter. DataFrame.join always uses `other`'s index but we can use + any column in `df`. This method preserves the original DataFrame's index in the result. >>> df.join(other.set_index('key'), on='key') @@ -6551,10 +6556,6 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', 3 K3 A3 NaN 4 K4 A4 NaN 5 K5 A5 NaN - - Returns - ------- - joined : DataFrame """ # For SparseDataFrame's benefit return self._join_compat(other, on=on, how=how, lsuffix=lsuffix, From 05988d4e782a04d64fc70430c9d80840861358c0 Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Sun, 30 Sep 2018 11:01:00 -0700 Subject: [PATCH 7/7] DOC GH22897 Undo previous commit --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b6375031f09b8..a5add27ed30d8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6530,7 +6530,7 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', 5 K5 A5 NaN NaN If we want to join using the key columns, we need to set key to be - the index in both ``df`` and `other`. The joined DataFrame will have + the index in both `df` and `other`. The joined DataFrame will have key as its index. >>> df.set_index('key').join(other.set_index('key')) @@ -6545,7 +6545,7 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', Another option to join using the key columns is to use the `on` parameter. DataFrame.join always uses `other`'s index but we can use - any column in ``df``. This method preserves the original DataFrame's + any column in `df`. This method preserves the original DataFrame's index in the result. >>> df.join(other.set_index('key'), on='key')