From 804eaaedc86a1e5989ae7cc3476b729ae9ad8cf4 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 11 Nov 2012 16:09:46 -0500 Subject: [PATCH 1/5] in core/frame.py changed method __getitem__ to use .mask directly (e.g. df.mask(df > 0) is equivalent semantically to df[df>0]) added inplace keyword to where method (to update the dataframe in place, default is NOT to use inplace, and return a new dataframe) changed method _boolean_set_ to use where and inplace=True (this allows alignment of the passed values and is slightly less strict than the current method) all tests pass (as well as an added test in boolean frame indexing) --- pandas/core/frame.py | 19 +++++++++---------- pandas/tests/test_frame.py | 6 ++++++ 2 files changed, 15 insertions(+), 10 deletions(-) mode change 100644 => 100755 pandas/core/frame.py mode change 100644 => 100755 pandas/tests/test_frame.py diff --git a/pandas/core/frame.py b/pandas/core/frame.py old mode 100644 new mode 100755 index 31c1a09f409c3..cada5f23cd87c --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1775,9 +1775,8 @@ def __getitem__(self, key): elif isinstance(self.columns, MultiIndex): return self._getitem_multilevel(key) elif isinstance(key, DataFrame): - values = key.values - if values.dtype == bool: - return self.values[values] + if key.values.dtype == bool: + return self.mask(key) else: raise ValueError('Cannot index using non-boolean DataFrame') else: @@ -1891,11 +1890,7 @@ def _boolean_set(self, key, value): if self._is_mixed_type: raise ValueError('Cannot do boolean setting on mixed-type frame') - if isinstance(value, DataFrame): - assert(value._indexed_same(self)) - np.putmask(self.values, mask, value.values) - else: - self.values[mask] = value + self.where(key, value, inplace=True) def _set_item_multiple(self, keys, value): if isinstance(value, DataFrame): @@ -4878,7 +4873,7 @@ def combineMult(self, other): """ return self.mul(other, fill_value=1.) - def where(self, cond, other): + def where(self, cond, other, inplace=False): """ Return a DataFrame with the same shape as self and whose corresponding entries are from self where cond is True and otherwise are from other. @@ -4905,9 +4900,13 @@ def where(self, cond, other): if isinstance(other, DataFrame): _, other = self.align(other, join='left', fill_value=NA) + if inplace: + np.putmask(self.values, cond, other) + return self + rs = np.where(cond, self, other) return self._constructor(rs, self.index, self.columns) - + def mask(self, cond): """ Returns copy of self whose values are replaced with nan if the diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py old mode 100644 new mode 100755 index 0b36e8d39a00a..69b744a84beb9 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -141,6 +141,12 @@ def test_getitem_boolean(self): self.assertRaises(ValueError, self.tsframe.__getitem__, self.tsframe) + # test df[df >0] works + bif = self.tsframe[self.tsframe > 0] + bifw = DataFrame(np.where(self.tsframe>0,self.tsframe,np.nan),index=self.tsframe.index,columns=self.tsframe.columns) + self.assert_(isinstance(bif,DataFrame)) + self.assert_(bif.shape == self.tsframe.shape) + assert_frame_equal(bif,bifw) def test_getitem_boolean_list(self): df = DataFrame(np.arange(12).reshape(3,4)) From 540fafd32f1a4a93c79ab3cec190c47e8ebcfcd5 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 11 Nov 2012 16:44:27 -0500 Subject: [PATCH 2/5] relaxed __setitem__ restriction on boolean indexing a frame on an equal sized frame thus we now allow: df[df[:-1]<0] = 2 (essentially partial boolean indexing) all tests continue to pass (added new test to test partial boolean indexing, removed test requiring an equal indexed frame) --- pandas/core/frame.py | 5 ----- pandas/tests/test_frame.py | 6 +++++- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cada5f23cd87c..b83961309253a 100755 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1870,11 +1870,6 @@ def __setitem__(self, key, value): # support boolean setting with DataFrame input, e.g. # df[df > df2] = 0 if isinstance(key, DataFrame): - if not (key.index.equals(self.index) and - key.columns.equals(self.columns)): - raise PandasError('Can only index with like-indexed ' - 'DataFrame objects') - self._boolean_set(key, value) elif isinstance(key, (np.ndarray, list)): return self._set_item_multiple(key, value) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 69b744a84beb9..824b668bed977 100755 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -284,7 +284,11 @@ def test_setitem_boolean(self): values[values == 5] = 0 assert_almost_equal(df.values, values) - self.assertRaises(Exception, df.__setitem__, df[:-1] > 0, 2) + # a df that needs alignment first + df[df[:-1]<0] = 2 + np.putmask(values[:-1],values[:-1]<0,2) + assert_almost_equal(df.values, values) + self.assertRaises(Exception, df.__setitem__, df * 0, 2) # index with DataFrame From 030bc669b0a338deb35773612472a76dbf2ae9dd Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 11 Nov 2012 17:31:33 -0500 Subject: [PATCH 3/5] fixed file modes for core/frame.py, test/test_frame.py --- pandas/core/frame.py | 3 +-- pandas/tests/test_frame.py | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b83961309253a..5e41bc6ec9481 100755 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1878,8 +1878,7 @@ def __setitem__(self, key, value): self._set_item(key, value) def _boolean_set(self, key, value): - mask = key.values - if mask.dtype != np.bool_: + if key.values.dtype != np.bool_: raise ValueError('Must pass DataFrame with boolean values only') if self._is_mixed_type: diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 824b668bed977..ef4e557b8e3a8 100755 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -142,6 +142,7 @@ def test_getitem_boolean(self): self.assertRaises(ValueError, self.tsframe.__getitem__, self.tsframe) # test df[df >0] works + bif = self.tsframe[self.tsframe > 0] bifw = DataFrame(np.where(self.tsframe>0,self.tsframe,np.nan),index=self.tsframe.index,columns=self.tsframe.columns) self.assert_(isinstance(bif,DataFrame)) From 8034116f41d642b8c16232ca0c971ae110072e42 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 13 Nov 2012 12:44:01 -0500 Subject: [PATCH 4/5] in core/frame.py removed mask method made other optional kw parm in where changed __setitem__ to use where (rather than mask) --- pandas/core/frame.py | 19 ++----------------- pandas/tests/test_frame.py | 3 --- 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5e41bc6ec9481..fe7ca6fa5c9b1 100755 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1776,7 +1776,7 @@ def __getitem__(self, key): return self._getitem_multilevel(key) elif isinstance(key, DataFrame): if key.values.dtype == bool: - return self.mask(key) + return self.where(key) else: raise ValueError('Cannot index using non-boolean DataFrame') else: @@ -4867,7 +4867,7 @@ def combineMult(self, other): """ return self.mul(other, fill_value=1.) - def where(self, cond, other, inplace=False): + def where(self, cond, other=NA, inplace=False): """ Return a DataFrame with the same shape as self and whose corresponding entries are from self where cond is True and otherwise are from other. @@ -4901,21 +4901,6 @@ def where(self, cond, other, inplace=False): rs = np.where(cond, self, other) return self._constructor(rs, self.index, self.columns) - def mask(self, cond): - """ - Returns copy of self whose values are replaced with nan if the - corresponding entry in cond is False - - Parameters - ---------- - cond: boolean DataFrame or array - - Returns - ------- - wh: DataFrame - """ - return self.where(cond, NA) - _EMPTY_SERIES = Series([]) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index ef4e557b8e3a8..aec7ddffd84e4 100755 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -142,7 +142,6 @@ def test_getitem_boolean(self): self.assertRaises(ValueError, self.tsframe.__getitem__, self.tsframe) # test df[df >0] works - bif = self.tsframe[self.tsframe > 0] bifw = DataFrame(np.where(self.tsframe>0,self.tsframe,np.nan),index=self.tsframe.index,columns=self.tsframe.columns) self.assert_(isinstance(bif,DataFrame)) @@ -5215,8 +5214,6 @@ def test_where(self): for k, v in rs.iteritems(): assert_series_equal(v, np.where(cond[k], df[k], other5)) - assert_frame_equal(rs, df.mask(cond)) - err1 = (df + 1).values[0:2, :] self.assertRaises(ValueError, df.where, cond, err1) From a4143469a961ccc9740a292c84b27c00d9674425 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 13 Nov 2012 12:56:09 -0500 Subject: [PATCH 5/5] added back mask method that does condition inversion added condition testing to where that raised ValueError on an invalid condition (e.g. not an ndarray like object) added tests for same --- pandas/core/frame.py | 18 ++++++++++++++++++ pandas/tests/test_frame.py | 12 ++++++++++++ 2 files changed, 30 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index fe7ca6fa5c9b1..c9184f148e5a9 100755 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4882,6 +4882,9 @@ def where(self, cond, other=NA, inplace=False): ------- wh: DataFrame """ + if not hasattr(cond,'shape'): + raise ValueError('where requires an ndarray like object for its condition') + if isinstance(cond, np.ndarray): if cond.shape != self.shape: raise ValueError('Array onditional must be same shape as self') @@ -4901,6 +4904,21 @@ def where(self, cond, other=NA, inplace=False): rs = np.where(cond, self, other) return self._constructor(rs, self.index, self.columns) + def mask(self, cond): + """ + Returns copy of self whose values are replaced with nan if the + inverted condition is True + + Parameters + ---------- + cond: boolean DataFrame or array + + Returns + ------- + wh: DataFrame + """ + return self.where(~cond, NA) + _EMPTY_SERIES = Series([]) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index aec7ddffd84e4..dcc7bcb909cd4 100755 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -5220,6 +5220,18 @@ def test_where(self): err2 = cond.ix[:2, :].values self.assertRaises(ValueError, df.where, err2, other1) + # invalid conditions + self.assertRaises(ValueError, df.mask, True) + self.assertRaises(ValueError, df.mask, 0) + + def test_mask(self): + df = DataFrame(np.random.randn(5, 3)) + cond = df > 0 + + rs = df.where(cond, np.nan) + assert_frame_equal(rs, df.mask(df <= 0)) + assert_frame_equal(rs, df.mask(~cond)) + #---------------------------------------------------------------------- # Transposing