diff --git a/ci/doctests.sh b/ci/doctests.sh index 48774a1e4d00d..b3d7f6785815a 100755 --- a/ci/doctests.sh +++ b/ci/doctests.sh @@ -21,7 +21,7 @@ if [ "$DOCTEST" ]; then # DataFrame / Series docstrings pytest --doctest-modules -v pandas/core/frame.py \ - -k"-axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata" + -k"-axes -combine -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata" if [ $? -ne "0" ]; then RET=1 diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 81d5c112885ec..721c31c57bc06 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7451,52 +7451,66 @@ def to_period(self, freq=None, axis=0, copy=True): def isin(self, values): """ - Return boolean DataFrame showing whether each element in the - DataFrame is contained in values. + Whether each element in the DataFrame is contained in values. Parameters ---------- - values : iterable, Series, DataFrame or dictionary + values : iterable, Series, DataFrame or dict The result will only be true at a location if all the labels match. If `values` is a Series, that's the index. If - `values` is a dictionary, the keys must be the column names, + `values` is a dict, the keys must be the column names, which must match. If `values` is a DataFrame, then both the index and column labels must match. Returns ------- + DataFrame + DataFrame of booleans showing whether each element in the DataFrame + is contained in values. - DataFrame of booleans + See Also + -------- + DataFrame.eq: Equality test for DataFrame. + Series.isin: Equivalent method on Series. + Series.str.contains: Test if pattern or regex is contained within a + string of a Series or Index. Examples -------- - When ``values`` is a list: - - >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'f']}) - >>> df.isin([1, 3, 12, 'a']) - A B - 0 True True - 1 False False - 2 True False - - When ``values`` is a dict: - - >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [1, 4, 7]}) - >>> df.isin({'A': [1, 3], 'B': [4, 7, 12]}) - A B - 0 True False # Note that B didn't match the 1 here. - 1 False True - 2 True True - - When ``values`` is a Series or DataFrame: - - >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'f']}) - >>> df2 = pd.DataFrame({'A': [1, 3, 3, 2], 'B': ['e', 'f', 'f', 'e']}) - >>> df.isin(df2) - A B - 0 True False - 1 False False # Column A in `df2` has a 3, but not at index 1. - 2 True True + + >>> df = pd.DataFrame({'num_legs': [2, 4], 'num_wings': [2, 0]}, + ... index=['falcon', 'dog']) + >>> df + num_legs num_wings + falcon 2 2 + dog 4 0 + + When ``values`` is a list check whether every value in the DataFrame + is present in the list (which animals have 0 or 2 legs or wings) + + >>> df.isin([0, 2]) + num_legs num_wings + falcon True True + dog False True + + When ``values`` is a dict, we can pass values to check for each + column separately: + + >>> df.isin({'num_wings': [0, 3]}) + num_legs num_wings + falcon False False + dog False True + + When ``values`` is a Series or DataFrame the index and column must + match. Note that 'falcon' does not match based on the number of legs + in df2. + + >>> other = pd.DataFrame({'num_legs': [8, 2],'num_wings': [0, 2]}, + ... index=['spider', 'falcon']) + >>> df.isin(other) + num_legs num_wings + falcon True True + dog False False """ if isinstance(values, dict): from pandas.core.reshape.concat import concat