From 0309899dd7855969850c217f2b4a510602dd2aac Mon Sep 17 00:00:00 2001
From: TomAugspurger <thomas-augspurger@uiowa.edu>
Date: Mon, 15 Jul 2013 17:47:59 -0500
Subject: [PATCH 1/3] ENH: Add `isin` method to DataFrame.

docs. to be rebased

ENH: Add isin method to DataFrame

Basic tests.

Added method and fixed tests.

ENH: Add ordered argument to df.isin()

Expects a sequence of arrays.

Updated release notes for df.isin()

CLN: cleanup

going to remove ordered argument.

Using a dict for ordered matching. Docs

BUG: fixed subselection length check issues.

Updated release notes for df.isin()

remove merge conflict note
---
 doc/source/indexing.rst    | 30 +++++++++++++++++++++++
 doc/source/release.rst     |  1 +
 pandas/core/frame.py       | 32 ++++++++++++++++++++++++
 pandas/tests/test_frame.py | 50 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 113 insertions(+)

diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
index e8d9fd52cf352..4f8fc5e78ece3 100644
--- a/doc/source/indexing.rst
+++ b/doc/source/indexing.rst
@@ -456,6 +456,36 @@ and :ref:`Advanced Indexing <indexing.advanced>` you may select along more than
 
    df2.loc[criterion & (df2['b'] == 'x'),'b':'c']
 
+*New in 0.12.0*
+
+DataFrame also has an ``isin`` method.  When calling ``isin``, pass a set of
+values as either an array or dict.  If values is just an array, ``isin`` returns
+a DataFrame of booleans that is the same shape as the original DataFrame, with Trues
+wherever the element is in the sequence of values.
+
+.. ipython:: python
+
+   df = DataFrame({'vals': [1, 2, 3, 4], 'ids': ['a', 'b', 'f', 'n'],
+                'ids2': ['a', 'n', 'c', 'n']})
+
+   values = ['a', 'b', 1, 3]
+
+   df.isin(values)
+
+Oftentimes you'll want to match certain values with certain columns or rows.
+Just make values a ``dict`` where the key is the row or column, and the value is
+a list of items you want to check for.  Make sure to set axis equal to 0 for
+row-wise or 1 for column-wise matching.
+
+.. ipython:: python
+
+   df = DataFrame({'vals': [1, 2, 3, 4], 'ids': ['a', 'b', 'f', 'n'],
+                   'ids2': ['a', 'n', 'c', 'n']})
+
+   values = {'ids': ['a', 'b'], 'vals': [1, 3]}
+
+   df.isin(values, axis=1)
+
 Where and Masking
 ~~~~~~~~~~~~~~~~~
 
diff --git a/doc/source/release.rst b/doc/source/release.rst
index 3b7d25789aa40..d03cdac14676a 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -54,6 +54,7 @@ pandas 0.12
   - Access to historical Google Finance data in pandas.io.data (:issue:`3814`)
   - DataFrame plotting methods can sample column colors from a Matplotlib
     colormap via the ``colormap`` keyword. (:issue:`3860`)
+  - Added ``isin`` method to DataFrame (:issue:`4211`)
 
 **Improvements to existing features**
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 401a7746953cb..702baa9550a00 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5481,6 +5481,38 @@ def to_period(self, freq=None, axis=0, copy=True):
 
         return self._constructor(new_data)
 
+    def isin(self, values, axis=None):
+        """
+        Return boolean vector showing whether elements in the DataFrame are
+        exactly contained in the passed sequence of values.
+
+        Parameters
+        ----------
+        values : sequence (array-like) or dict of {label: sequence}.
+        axis : {None, 0, 1}
+            Compute isin row-wise (axis=0) or column-wise (axis=1)
+            Mandatory if values is a dict, ignored otherwise.
+
+        Returns
+        -------
+
+        bools : Series of booleans
+        """
+        if not isinstance(values, dict):
+            return self.applymap(values.__contains__)
+
+        else:
+            from pandas.tools.merge import concat
+            if axis == 1:
+                return concat((self[col].isin(vals) for col, vals in
+                               values.iteritems()), axis=1)
+            elif axis == 0:
+                return concat((self.loc[row].isin(vals) for row, vals in
+                               values.iteritems()), axis=1).T
+            else:
+                raise TypeError('Axis must be "0" or "1" when values is a dict '
+                                'Got "%s" instead.' % str(axis))
+
     #----------------------------------------------------------------------
     # Deprecated stuff
 
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index a9df56a498f63..07aa4fd13e1a1 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -10633,6 +10633,56 @@ def _check_f(base, f):
         f = lambda x: x.rename({1: 'foo'}, inplace=True)
         _check_f(data.copy()['c'], f)
 
+    def test_isin(self):
+        # GH #4211
+        df = DataFrame({'vals': [1, 2, 3, 4], 'ids': ['a', 'b', 'f', 'n'],
+                        'ids2': ['a', 'n', 'c', 'n']},
+                        index=['foo', 'bar', 'baz', 'qux'])
+        other = ['a', 'b', 'c']
+        result_none = df[['ids', 'ids2']].isin(other)
+        expected_none = DataFrame({'ids':  [True, True, False, False],
+                                  'ids2': [True, False, True, False]},
+                                  index=['foo', 'bar', 'baz', 'qux'])
+
+        assert_frame_equal(result_none, expected_none)
+
+        # axis = None
+        result_none_full = df.isin(other)
+        expected_none_full = DataFrame({'ids': [True, True, False, False],
+                                        'ids2': [True, False, True, False],
+                                        'vals': [False, False, False, False]},
+                                        index=['foo', 'bar', 'baz', 'qux'])
+
+        assert_frame_equal(result_none_full, expected_none_full)
+
+    def test_isin_dict(self):
+        df = DataFrame({'A': ['a', 'b', 'c', 'd'], 'B': [1, 2, 3, 4],
+                        'C': [1, 5, 7, 8]},
+                        index=['foo', 'bar', 'baz', 'qux'])
+        other = {'A': ('a', 'b'), 'B': (1, 3)}
+        result = df.isin(other, axis=1)
+        expected = DataFrame({'A': [True, True, False, False],
+                              'B': [True, False, True, False]},
+                              index=['foo', 'bar', 'baz', 'qux'])
+        assert_frame_equal(result, expected)
+
+    def test_isin_row(self):
+        df = DataFrame({'A': ['a', 'b', 'c', 'd'], 'B': [1, 2, 3, 4],
+                        'C': [1, 5, 7, 8]},
+                        index=['foo', 'bar', 'baz', 'qux'])
+        ind_other = {'foo': ['a', 1, 1],
+                     'bar': ['d', 2, 1],
+                     'baz': ['nn', 'nn', 'nn']}
+
+        result_ind = df.isin(ind_other, axis=0)
+        expected_ind = DataFrame({'A': [True, False, False],
+                                  'B': [True, True, False],
+                                  'C': [True, False, False]},
+                            index=['foo', 'bar', 'baz']).reindex_like(result_ind)
+
+        assert_frame_equal(result_ind, expected_ind)
+
+        self.assertRaises(TypeError, df.isin, ind_other)
 
 if __name__ == '__main__':
     # unittest.main()

From ab1b17e54bbe8e6de62632b3c0a222eb89c43c46 Mon Sep 17 00:00:00 2001
From: Andy Hayden <andyhayden1@gmail.com>
Date: Tue, 16 Jul 2013 01:13:05 +0100
Subject: [PATCH 2/3] ENH tweak DataFrame isin method

---
 doc/source/indexing.rst    | 15 ++++------
 pandas/core/frame.py       | 36 +++++++++++-------------
 pandas/tests/test_frame.py | 56 ++++++++++++++------------------------
 3 files changed, 42 insertions(+), 65 deletions(-)

diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
index 4f8fc5e78ece3..d3d6fe367a0bd 100644
--- a/doc/source/indexing.rst
+++ b/doc/source/indexing.rst
@@ -456,11 +456,9 @@ and :ref:`Advanced Indexing <indexing.advanced>` you may select along more than
 
    df2.loc[criterion & (df2['b'] == 'x'),'b':'c']
 
-*New in 0.12.0*
-
 DataFrame also has an ``isin`` method.  When calling ``isin``, pass a set of
-values as either an array or dict.  If values is just an array, ``isin`` returns
-a DataFrame of booleans that is the same shape as the original DataFrame, with Trues
+values as either an array or dict.  If values is an array, ``isin`` returns
+a DataFrame of booleans that is the same shape as the original DataFrame, with True
 wherever the element is in the sequence of values.
 
 .. ipython:: python
@@ -472,10 +470,9 @@ wherever the element is in the sequence of values.
 
    df.isin(values)
 
-Oftentimes you'll want to match certain values with certain columns or rows.
-Just make values a ``dict`` where the key is the row or column, and the value is
-a list of items you want to check for.  Make sure to set axis equal to 0 for
-row-wise or 1 for column-wise matching.
+Oftentimes you'll want to match certain values with certain columns.
+Just make values a ``dict`` where the key is the column, and the value is
+a list of items you want to check for.
 
 .. ipython:: python
 
@@ -484,7 +481,7 @@ row-wise or 1 for column-wise matching.
 
    values = {'ids': ['a', 'b'], 'vals': [1, 3]}
 
-   df.isin(values, axis=1)
+   df.isin(values)
 
 Where and Masking
 ~~~~~~~~~~~~~~~~~
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 702baa9550a00..331deccaf80e3 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5481,37 +5481,33 @@ def to_period(self, freq=None, axis=0, copy=True):
 
         return self._constructor(new_data)
 
-    def isin(self, values, axis=None):
+
+    def isin(self, values):
         """
-        Return boolean vector showing whether elements in the DataFrame are
-        exactly contained in the passed sequence of values.
+        Return boolean DataFrame showing whether each elements in the DataFrame is
+        contained in items.
 
         Parameters
         ----------
-        values : sequence (array-like) or dict of {label: sequence}.
-        axis : {None, 0, 1}
-            Compute isin row-wise (axis=0) or column-wise (axis=1)
-            Mandatory if values is a dict, ignored otherwise.
+        values : iterable or dictionary of columns to values
 
         Returns
         -------
 
-        bools : Series of booleans
+        DataFrame of booleans
         """
-        if not isinstance(values, dict):
-            return self.applymap(values.__contains__)
+        if isinstance(values, dict):
+            from collections import defaultdict
+            from pandas.tools.merge import concat
+            values = defaultdict(list, values)
+            return concat((self.iloc[:, [i]].isin(values[ind] or values[i])
+                             for i, ind in enumerate(self.columns)), axis=1)
 
         else:
-            from pandas.tools.merge import concat
-            if axis == 1:
-                return concat((self[col].isin(vals) for col, vals in
-                               values.iteritems()), axis=1)
-            elif axis == 0:
-                return concat((self.loc[row].isin(vals) for row, vals in
-                               values.iteritems()), axis=1).T
-            else:
-                raise TypeError('Axis must be "0" or "1" when values is a dict '
-                                'Got "%s" instead.' % str(axis))
+            return DataFrame(lib.ismember(self.values.ravel(),
+                                          set(values)).reshape(self.shape),
+                             self.index,
+                             self.columns)
 
     #----------------------------------------------------------------------
     # Deprecated stuff
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 07aa4fd13e1a1..916a38ae872d5 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -10639,50 +10639,34 @@ def test_isin(self):
                         'ids2': ['a', 'n', 'c', 'n']},
                         index=['foo', 'bar', 'baz', 'qux'])
         other = ['a', 'b', 'c']
-        result_none = df[['ids', 'ids2']].isin(other)
-        expected_none = DataFrame({'ids':  [True, True, False, False],
-                                  'ids2': [True, False, True, False]},
-                                  index=['foo', 'bar', 'baz', 'qux'])
 
-        assert_frame_equal(result_none, expected_none)
-
-        # axis = None
-        result_none_full = df.isin(other)
-        expected_none_full = DataFrame({'ids': [True, True, False, False],
-                                        'ids2': [True, False, True, False],
-                                        'vals': [False, False, False, False]},
-                                        index=['foo', 'bar', 'baz', 'qux'])
+        result = df.isin(other)
+        expected = DataFrame([df.loc[s].isin(other) for s in df.index])
+        assert_frame_equal(result, expected)
 
-        assert_frame_equal(result_none_full, expected_none_full)
+    def test_isin_empty(self):
+        df = DataFrame({'A': ['a', 'b', 'c'], 'B': ['a', 'e', 'f']})
+        result = df.isin([])
+        expected = pd.DataFrame(False, df.index, df.columns)
+        assert_frame_equal(result, expected)
 
     def test_isin_dict(self):
-        df = DataFrame({'A': ['a', 'b', 'c', 'd'], 'B': [1, 2, 3, 4],
-                        'C': [1, 5, 7, 8]},
-                        index=['foo', 'bar', 'baz', 'qux'])
-        other = {'A': ('a', 'b'), 'B': (1, 3)}
-        result = df.isin(other, axis=1)
-        expected = DataFrame({'A': [True, True, False, False],
-                              'B': [True, False, True, False]},
-                              index=['foo', 'bar', 'baz', 'qux'])
-        assert_frame_equal(result, expected)
+        df = DataFrame({'A': ['a', 'b', 'c'], 'B': ['a', 'e', 'f']})
+        d = {'A': ['a']}
 
-    def test_isin_row(self):
-        df = DataFrame({'A': ['a', 'b', 'c', 'd'], 'B': [1, 2, 3, 4],
-                        'C': [1, 5, 7, 8]},
-                        index=['foo', 'bar', 'baz', 'qux'])
-        ind_other = {'foo': ['a', 1, 1],
-                     'bar': ['d', 2, 1],
-                     'baz': ['nn', 'nn', 'nn']}
+        expected = DataFrame(False, df.index, df.columns)
+        expected.loc[0, 'A'] = True
 
-        result_ind = df.isin(ind_other, axis=0)
-        expected_ind = DataFrame({'A': [True, False, False],
-                                  'B': [True, True, False],
-                                  'C': [True, False, False]},
-                            index=['foo', 'bar', 'baz']).reindex_like(result_ind)
+        result = df.isin(d)
+        assert_frame_equal(result, expected)
 
-        assert_frame_equal(result_ind, expected_ind)
+        # non unique columns
+        df.columns = ['A', 'A']
+        expected = DataFrame(False, df.index, df.columns)
+        expected.loc[0, 'A'] = True
+        result = df.isin(d)
+        assert_frame_equal(result, expected)
 
-        self.assertRaises(TypeError, df.isin, ind_other)
 
 if __name__ == '__main__':
     # unittest.main()

From 60b623fd1fb311d8e9cf2a2f97d7160e32656248 Mon Sep 17 00:00:00 2001
From: Andy Hayden <andyhayden1@gmail.com>
Date: Tue, 16 Jul 2013 14:15:06 +0100
Subject: [PATCH 3/3] ENH add iloc argument to DataFrame isin

---
 doc/source/indexing.rst    | 12 +++++++++---
 pandas/core/frame.py       | 17 ++++++++++++-----
 pandas/tests/test_frame.py | 15 +++++++++++++++
 3 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
index d3d6fe367a0bd..213a7ab659dae 100644
--- a/doc/source/indexing.rst
+++ b/doc/source/indexing.rst
@@ -476,13 +476,19 @@ a list of items you want to check for.
 
 .. ipython:: python
 
-   df = DataFrame({'vals': [1, 2, 3, 4], 'ids': ['a', 'b', 'f', 'n'],
-                   'ids2': ['a', 'n', 'c', 'n']})
-
    values = {'ids': ['a', 'b'], 'vals': [1, 3]}
 
    df.isin(values)
 
+You can also describe columns using integer location:
+
+.. ipython:: python
+
+   values = {0: ['a', 'b']}
+
+   df.isin(values, iloc=True)
+
+
 Where and Masking
 ~~~~~~~~~~~~~~~~~
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 331deccaf80e3..22dc27ff977d9 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5482,14 +5482,16 @@ def to_period(self, freq=None, axis=0, copy=True):
         return self._constructor(new_data)
 
 
-    def isin(self, values):
+    def isin(self, values, iloc=False):
         """
-        Return boolean DataFrame showing whether each elements in the DataFrame is
-        contained in items.
+        Return boolean DataFrame showing whether each element in the DataFrame is
+        contained in values.
 
         Parameters
         ----------
         values : iterable or dictionary of columns to values
+        iloc : boolean, if passing a dict as values, describe columns using integer
+                        locations (default is to use labels)
 
         Returns
         -------
@@ -5500,8 +5502,13 @@ def isin(self, values):
             from collections import defaultdict
             from pandas.tools.merge import concat
             values = defaultdict(list, values)
-            return concat((self.iloc[:, [i]].isin(values[ind] or values[i])
-                             for i, ind in enumerate(self.columns)), axis=1)
+            if iloc:
+                return concat((self.iloc[:, [i]].isin(values[i])
+                                 for i, col in enumerate(self.columns)), axis=1)
+            else:
+                return concat((self.iloc[:, [i]].isin(values[col])
+                                 for i, col in enumerate(self.columns)), axis=1)
+
 
         else:
             return DataFrame(lib.ismember(self.values.ravel(),
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 916a38ae872d5..577cbfe9dc744 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -10661,12 +10661,27 @@ def test_isin_dict(self):
         assert_frame_equal(result, expected)
 
         # non unique columns
+        df = DataFrame({'A': ['a', 'b', 'c'], 'B': ['a', 'e', 'f']})
         df.columns = ['A', 'A']
         expected = DataFrame(False, df.index, df.columns)
         expected.loc[0, 'A'] = True
         result = df.isin(d)
         assert_frame_equal(result, expected)
 
+        # iloc
+        df = DataFrame({'A': ['a', 'b', 'c'], 'B': ['a', 'e', 'f']})
+        d = {0: ['a']}
+        expected = DataFrame(False, df.index, df.columns)
+
+        # without using iloc
+        result = df.isin(d)
+        assert_frame_equal(result, expected)        
+
+        # using iloc
+        result = df.isin(d, iloc=True)
+        expected.iloc[0, 0] = True
+        assert_frame_equal(result, expected)        
+
 
 if __name__ == '__main__':
     # unittest.main()