From 0d3ebaaf0c85c2b12e304ab6a2222050f4f2240c Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Wed, 19 Sep 2018 13:16:15 -0400 Subject: [PATCH 1/6] DOC: fix DataFrame.isin docstring and doctests --- ci/doctests.sh | 2 +- pandas/core/frame.py | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/ci/doctests.sh b/ci/doctests.sh index e7fe80e60eb6d..d914d9152fd3f 100755 --- a/ci/doctests.sh +++ b/ci/doctests.sh @@ -21,7 +21,7 @@ if [ "$DOCTEST" ]; then # DataFrame / Series docstrings pytest --doctest-modules -v pandas/core/frame.py \ - -k"-assign -axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata" + -k"-assign -axes -combine -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata" if [ $? -ne "0" ]; then RET=1 diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bb221ced9e6bd..dbc58edd1871c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7430,8 +7430,7 @@ def to_period(self, freq=None, axis=0, copy=True): def isin(self, values): """ - Return boolean DataFrame showing whether each element in the - DataFrame is contained in values. + Whether each element in the DataFrame is contained in values. Parameters ---------- @@ -7444,8 +7443,9 @@ def isin(self, values): Returns ------- - - DataFrame of booleans + DataFrame + DataFrame of boolean showing whether each element in the DataFrame + is contained in values. Examples -------- @@ -7458,23 +7458,24 @@ def isin(self, values): 1 False False 2 True False - When ``values`` is a dict: + When ``values`` is a dict. Note that B doesn't match the 1 here. >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [1, 4, 7]}) >>> df.isin({'A': [1, 3], 'B': [4, 7, 12]}) A B - 0 True False # Note that B didn't match the 1 here. + 0 True False 1 False True 2 True True - When ``values`` is a Series or DataFrame: + When ``values`` is a Series or DataFrame. Column A in `df2` has a + 3, but not at index 1. >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'f']}) >>> df2 = pd.DataFrame({'A': [1, 3, 3, 2], 'B': ['e', 'f', 'f', 'e']}) >>> df.isin(df2) A B 0 True False - 1 False False # Column A in `df2` has a 3, but not at index 1. + 1 False False 2 True True """ if isinstance(values, dict): From 82530fa56b5fa38b28ce769cfda48ef1e971358c Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Sun, 23 Sep 2018 19:16:00 -0400 Subject: [PATCH 2/6] Add a better example to DataFrame.isin docstring --- pandas/core/frame.py | 54 +++++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 51c7f3c92ac73..b73b71e59bf53 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7455,10 +7455,10 @@ def isin(self, values): Parameters ---------- - values : iterable, Series, DataFrame or dictionary + values : iterable, Series, DataFrame or dict The result will only be true at a location if all the labels match. If `values` is a Series, that's the index. If - `values` is a dictionary, the keys must be the column names, + `values` is a dict, the keys must be the column names, which must match. If `values` is a DataFrame, then both the index and column labels must match. @@ -7468,36 +7468,44 @@ def isin(self, values): DataFrame of boolean showing whether each element in the DataFrame is contained in values. + See Also + -------- + DataFrame.eq: Equality test for DataFrame. + Series.isin: Equivalent method on Series. + Examples -------- + + >>> df = pd.DataFrame({'num_legs': [2, 4], 'num_wings': [2, 0]}, + ... index=['falcon', 'dog']) + >>> df + num_legs num_wings + falcon 2 2 + dog 4 0 + When ``values`` is a list: - >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'f']}) - >>> df.isin([1, 3, 12, 'a']) - A B - 0 True True - 1 False False - 2 True False + >>> df.isin([2]) + num_legs num_wings + falcon True True + dog False False - When ``values`` is a dict. Note that B doesn't match the 1 here. + When ``values`` is a dict. - >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [1, 4, 7]}) - >>> df.isin({'A': [1, 3], 'B': [4, 7, 12]}) - A B - 0 True False - 1 False True - 2 True True + >>> df.isin({'num_wings': [0, 3], 'num_legs': [0]}) + num_legs num_wings + falcon False False + dog False True - When ``values`` is a Series or DataFrame. Column A in `df2` has a - 3, but not at index 1. + When ``values`` is a Series or DataFrame. Note that 'falcon' does not + match based on the number of legs in df2. - >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'f']}) - >>> df2 = pd.DataFrame({'A': [1, 3, 3, 2], 'B': ['e', 'f', 'f', 'e']}) + >>> df2 = pd.DataFrame({'num_legs': [8, 0, 2], 'num_wings': [0, 2, 2]}, + ... index=['spider', 'falcon', 'parrot']) >>> df.isin(df2) - A B - 0 True False - 1 False False - 2 True True + num_legs num_wings + falcon False True + dog False False """ if isinstance(values, dict): from pandas.core.reshape.concat import concat From b5af7883d28745f2adfc14adc3961ba5a1db3fdf Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Mon, 24 Sep 2018 10:29:27 -0400 Subject: [PATCH 3/6] Add more text to examples of DataFrame.isin docstring --- pandas/core/frame.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b73b71e59bf53..bb9d7366f83f2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7465,13 +7465,14 @@ def isin(self, values): Returns ------- DataFrame - DataFrame of boolean showing whether each element in the DataFrame + DataFrame of booleans showing whether each element in the DataFrame is contained in values. See Also -------- DataFrame.eq: Equality test for DataFrame. Series.isin: Equivalent method on Series. + Series.str.isin: Examples -------- @@ -7490,15 +7491,17 @@ def isin(self, values): falcon True True dog False False - When ``values`` is a dict. + When ``values`` is a dict, we can pass values to check for each + column separately: >>> df.isin({'num_wings': [0, 3], 'num_legs': [0]}) num_legs num_wings falcon False False dog False True - When ``values`` is a Series or DataFrame. Note that 'falcon' does not - match based on the number of legs in df2. + When ``values`` is a Series or DataFrame the index and column must + match. Note that 'falcon' does not match based on the number of legs + in df2. >>> df2 = pd.DataFrame({'num_legs': [8, 0, 2], 'num_wings': [0, 2, 2]}, ... index=['spider', 'falcon', 'parrot']) From a781439663c6cc2005dac923ee1c3503bbfccb79 Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Mon, 24 Sep 2018 10:30:07 -0400 Subject: [PATCH 4/6] Remove Series.str.isin until we find a description --- pandas/core/frame.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bb9d7366f83f2..2fa08dfb43462 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7472,7 +7472,6 @@ def isin(self, values): -------- DataFrame.eq: Equality test for DataFrame. Series.isin: Equivalent method on Series. - Series.str.isin: Examples -------- From e59e69f7b339627ea27f4e59c2dffe5378b9172b Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Mon, 24 Sep 2018 10:54:53 -0400 Subject: [PATCH 5/6] Add Series.str.contains to the See Also section --- pandas/core/frame.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2fa08dfb43462..e7eec833c2465 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7472,6 +7472,8 @@ def isin(self, values): -------- DataFrame.eq: Equality test for DataFrame. Series.isin: Equivalent method on Series. + Series.str.contains: Test if pattern or regex is contained within a + string of a Series or Index. Examples -------- From 938a21f35fb6d6726d7cb20aabc812fec0f67b4a Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Mon, 24 Sep 2018 21:47:54 -0400 Subject: [PATCH 6/6] Further improvement to the examples --- pandas/core/frame.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e7eec833c2465..721c31c57bc06 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7485,17 +7485,18 @@ def isin(self, values): falcon 2 2 dog 4 0 - When ``values`` is a list: + When ``values`` is a list check whether every value in the DataFrame + is present in the list (which animals have 0 or 2 legs or wings) - >>> df.isin([2]) + >>> df.isin([0, 2]) num_legs num_wings falcon True True - dog False False + dog False True When ``values`` is a dict, we can pass values to check for each column separately: - >>> df.isin({'num_wings': [0, 3], 'num_legs': [0]}) + >>> df.isin({'num_wings': [0, 3]}) num_legs num_wings falcon False False dog False True @@ -7504,11 +7505,11 @@ def isin(self, values): match. Note that 'falcon' does not match based on the number of legs in df2. - >>> df2 = pd.DataFrame({'num_legs': [8, 0, 2], 'num_wings': [0, 2, 2]}, - ... index=['spider', 'falcon', 'parrot']) - >>> df.isin(df2) + >>> other = pd.DataFrame({'num_legs': [8, 2],'num_wings': [0, 2]}, + ... index=['spider', 'falcon']) + >>> df.isin(other) num_legs num_wings - falcon False True + falcon True True dog False False """ if isinstance(values, dict):