Skip to content

Commit 0309899

Browse files
author
TomAugspurger
committed
ENH: Add isin method to DataFrame.
docs. to be rebased ENH: Add isin method to DataFrame Basic tests. Added method and fixed tests. ENH: Add ordered argument to df.isin() Expects a sequence of arrays. Updated release notes for df.isin() CLN: cleanup going to remove ordered argument. Using a dict for ordered matching. Docs BUG: fixed subselection length check issues. Updated release notes for df.isin() remove merge conflict note
1 parent 1e69dad commit 0309899

File tree

4 files changed

+113
-0
lines changed

4 files changed

+113
-0
lines changed

doc/source/indexing.rst

+30
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,36 @@ and :ref:`Advanced Indexing <indexing.advanced>` you may select along more than
456456
457457
df2.loc[criterion & (df2['b'] == 'x'),'b':'c']
458458
459+
*New in 0.12.0*
460+
461+
DataFrame also has an ``isin`` method. When calling ``isin``, pass a set of
462+
values as either an array or dict. If values is just an array, ``isin`` returns
463+
a DataFrame of booleans that is the same shape as the original DataFrame, with Trues
464+
wherever the element is in the sequence of values.
465+
466+
.. ipython:: python
467+
468+
df = DataFrame({'vals': [1, 2, 3, 4], 'ids': ['a', 'b', 'f', 'n'],
469+
'ids2': ['a', 'n', 'c', 'n']})
470+
471+
values = ['a', 'b', 1, 3]
472+
473+
df.isin(values)
474+
475+
Oftentimes you'll want to match certain values with certain columns or rows.
476+
Just make values a ``dict`` where the key is the row or column, and the value is
477+
a list of items you want to check for. Make sure to set axis equal to 0 for
478+
row-wise or 1 for column-wise matching.
479+
480+
.. ipython:: python
481+
482+
df = DataFrame({'vals': [1, 2, 3, 4], 'ids': ['a', 'b', 'f', 'n'],
483+
'ids2': ['a', 'n', 'c', 'n']})
484+
485+
values = {'ids': ['a', 'b'], 'vals': [1, 3]}
486+
487+
df.isin(values, axis=1)
488+
459489
Where and Masking
460490
~~~~~~~~~~~~~~~~~
461491

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ pandas 0.12
5454
- Access to historical Google Finance data in pandas.io.data (:issue:`3814`)
5555
- DataFrame plotting methods can sample column colors from a Matplotlib
5656
colormap via the ``colormap`` keyword. (:issue:`3860`)
57+
- Added ``isin`` method to DataFrame (:issue:`4211`)
5758

5859
**Improvements to existing features**
5960

pandas/core/frame.py

+32
Original file line numberDiff line numberDiff line change
@@ -5481,6 +5481,38 @@ def to_period(self, freq=None, axis=0, copy=True):
54815481

54825482
return self._constructor(new_data)
54835483

5484+
def isin(self, values, axis=None):
5485+
"""
5486+
Return boolean vector showing whether elements in the DataFrame are
5487+
exactly contained in the passed sequence of values.
5488+
5489+
Parameters
5490+
----------
5491+
values : sequence (array-like) or dict of {label: sequence}.
5492+
axis : {None, 0, 1}
5493+
Compute isin row-wise (axis=0) or column-wise (axis=1)
5494+
Mandatory if values is a dict, ignored otherwise.
5495+
5496+
Returns
5497+
-------
5498+
5499+
bools : Series of booleans
5500+
"""
5501+
if not isinstance(values, dict):
5502+
return self.applymap(values.__contains__)
5503+
5504+
else:
5505+
from pandas.tools.merge import concat
5506+
if axis == 1:
5507+
return concat((self[col].isin(vals) for col, vals in
5508+
values.iteritems()), axis=1)
5509+
elif axis == 0:
5510+
return concat((self.loc[row].isin(vals) for row, vals in
5511+
values.iteritems()), axis=1).T
5512+
else:
5513+
raise TypeError('Axis must be "0" or "1" when values is a dict '
5514+
'Got "%s" instead.' % str(axis))
5515+
54845516
#----------------------------------------------------------------------
54855517
# Deprecated stuff
54865518

pandas/tests/test_frame.py

+50
Original file line numberDiff line numberDiff line change
@@ -10633,6 +10633,56 @@ def _check_f(base, f):
1063310633
f = lambda x: x.rename({1: 'foo'}, inplace=True)
1063410634
_check_f(data.copy()['c'], f)
1063510635

10636+
def test_isin(self):
10637+
# GH #4211
10638+
df = DataFrame({'vals': [1, 2, 3, 4], 'ids': ['a', 'b', 'f', 'n'],
10639+
'ids2': ['a', 'n', 'c', 'n']},
10640+
index=['foo', 'bar', 'baz', 'qux'])
10641+
other = ['a', 'b', 'c']
10642+
result_none = df[['ids', 'ids2']].isin(other)
10643+
expected_none = DataFrame({'ids': [True, True, False, False],
10644+
'ids2': [True, False, True, False]},
10645+
index=['foo', 'bar', 'baz', 'qux'])
10646+
10647+
assert_frame_equal(result_none, expected_none)
10648+
10649+
# axis = None
10650+
result_none_full = df.isin(other)
10651+
expected_none_full = DataFrame({'ids': [True, True, False, False],
10652+
'ids2': [True, False, True, False],
10653+
'vals': [False, False, False, False]},
10654+
index=['foo', 'bar', 'baz', 'qux'])
10655+
10656+
assert_frame_equal(result_none_full, expected_none_full)
10657+
10658+
def test_isin_dict(self):
10659+
df = DataFrame({'A': ['a', 'b', 'c', 'd'], 'B': [1, 2, 3, 4],
10660+
'C': [1, 5, 7, 8]},
10661+
index=['foo', 'bar', 'baz', 'qux'])
10662+
other = {'A': ('a', 'b'), 'B': (1, 3)}
10663+
result = df.isin(other, axis=1)
10664+
expected = DataFrame({'A': [True, True, False, False],
10665+
'B': [True, False, True, False]},
10666+
index=['foo', 'bar', 'baz', 'qux'])
10667+
assert_frame_equal(result, expected)
10668+
10669+
def test_isin_row(self):
10670+
df = DataFrame({'A': ['a', 'b', 'c', 'd'], 'B': [1, 2, 3, 4],
10671+
'C': [1, 5, 7, 8]},
10672+
index=['foo', 'bar', 'baz', 'qux'])
10673+
ind_other = {'foo': ['a', 1, 1],
10674+
'bar': ['d', 2, 1],
10675+
'baz': ['nn', 'nn', 'nn']}
10676+
10677+
result_ind = df.isin(ind_other, axis=0)
10678+
expected_ind = DataFrame({'A': [True, False, False],
10679+
'B': [True, True, False],
10680+
'C': [True, False, False]},
10681+
index=['foo', 'bar', 'baz']).reindex_like(result_ind)
10682+
10683+
assert_frame_equal(result_ind, expected_ind)
10684+
10685+
self.assertRaises(TypeError, df.isin, ind_other)
1063610686

1063710687
if __name__ == '__main__':
1063810688
# unittest.main()

0 commit comments

Comments
 (0)