Skip to content

ENH: DataFrame isin #4237

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 24, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions doc/source/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,36 @@ and :ref:`Advanced Indexing <indexing.advanced>` you may select along more than

df2.loc[criterion & (df2['b'] == 'x'),'b':'c']

*New in 0.12.0*

DataFrame also has an ``isin`` method. When calling ``isin``, pass a set of
values as either an array or dict. If values is just an array, ``isin`` returns
a DataFrame of booleans that is the same shape as the original DataFrame, with Trues
wherever the element is in the sequence of values.

.. ipython:: python

df = DataFrame({'vals': [1, 2, 3, 4], 'ids': ['a', 'b', 'f', 'n'],
'ids2': ['a', 'n', 'c', 'n']})

values = ['a', 'b', 1, 3]

df.isin(values)

Oftentimes you'll want to match certain values with certain columns or rows.
Just make values a ``dict`` where the key is the row or column, and the value is
a list of items you want to check for. Make sure to set axis equal to 0 for
row-wise or 1 for column-wise matching.

.. ipython:: python

df = DataFrame({'vals': [1, 2, 3, 4], 'ids': ['a', 'b', 'f', 'n'],
'ids2': ['a', 'n', 'c', 'n']})

values = {'ids': ['a', 'b'], 'vals': [1, 3]}

df.isin(values, axis=1)

Where and Masking
~~~~~~~~~~~~~~~~~

Expand Down
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ pandas 0.12
- Access to historical Google Finance data in pandas.io.data (:issue:`3814`)
- DataFrame plotting methods can sample column colors from a Matplotlib
colormap via the ``colormap`` keyword. (:issue:`3860`)
- Added ``isin`` method to DataFrame (:issue:`4211`)

**Improvements to existing features**

Expand Down
32 changes: 32 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5481,6 +5481,38 @@ def to_period(self, freq=None, axis=0, copy=True):

return self._constructor(new_data)

def isin(self, values, axis=None):
"""
Return boolean vector showing whether elements in the DataFrame are
exactly contained in the passed sequence of values.

Parameters
----------
values : sequence (array-like) or dict of {label: sequence}.
axis : {None, 0, 1}
Compute isin row-wise (axis=0) or column-wise (axis=1)
Mandatory if values is a dict, ignored otherwise.

Returns
-------

bools : Series of booleans
"""
if not isinstance(values, dict):
return self.applymap(values.__contains__)

else:
from pandas.tools.merge import concat
if axis == 1:
return concat((self[col].isin(vals) for col, vals in
values.iteritems()), axis=1)
elif axis == 0:
return concat((self.loc[row].isin(vals) for row, vals in
values.iteritems()), axis=1).T
else:
raise TypeError('Axis must be "0" or "1" when values is a dict '
'Got "%s" instead.' % str(axis))

#----------------------------------------------------------------------
# Deprecated stuff

Expand Down
50 changes: 50 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -10633,6 +10633,56 @@ def _check_f(base, f):
f = lambda x: x.rename({1: 'foo'}, inplace=True)
_check_f(data.copy()['c'], f)

def test_isin(self):
# GH #4211
df = DataFrame({'vals': [1, 2, 3, 4], 'ids': ['a', 'b', 'f', 'n'],
'ids2': ['a', 'n', 'c', 'n']},
index=['foo', 'bar', 'baz', 'qux'])
other = ['a', 'b', 'c']
result_none = df[['ids', 'ids2']].isin(other)
expected_none = DataFrame({'ids': [True, True, False, False],
'ids2': [True, False, True, False]},
index=['foo', 'bar', 'baz', 'qux'])

assert_frame_equal(result_none, expected_none)

# axis = None
result_none_full = df.isin(other)
expected_none_full = DataFrame({'ids': [True, True, False, False],
'ids2': [True, False, True, False],
'vals': [False, False, False, False]},
index=['foo', 'bar', 'baz', 'qux'])

assert_frame_equal(result_none_full, expected_none_full)

def test_isin_dict(self):
df = DataFrame({'A': ['a', 'b', 'c', 'd'], 'B': [1, 2, 3, 4],
'C': [1, 5, 7, 8]},
index=['foo', 'bar', 'baz', 'qux'])
other = {'A': ('a', 'b'), 'B': (1, 3)}
result = df.isin(other, axis=1)
expected = DataFrame({'A': [True, True, False, False],
'B': [True, False, True, False]},
index=['foo', 'bar', 'baz', 'qux'])
assert_frame_equal(result, expected)

def test_isin_row(self):
df = DataFrame({'A': ['a', 'b', 'c', 'd'], 'B': [1, 2, 3, 4],
'C': [1, 5, 7, 8]},
index=['foo', 'bar', 'baz', 'qux'])
ind_other = {'foo': ['a', 1, 1],
'bar': ['d', 2, 1],
'baz': ['nn', 'nn', 'nn']}

result_ind = df.isin(ind_other, axis=0)
expected_ind = DataFrame({'A': [True, False, False],
'B': [True, True, False],
'C': [True, False, False]},
index=['foo', 'bar', 'baz']).reindex_like(result_ind)

assert_frame_equal(result_ind, expected_ind)

self.assertRaises(TypeError, df.isin, ind_other)

if __name__ == '__main__':
# unittest.main()
Expand Down