diff --git a/pandas/core/frame.py b/pandas/core/frame.py old mode 100644 new mode 100755 index 31c1a09f409c3..c9184f148e5a9 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1775,9 +1775,8 @@ def __getitem__(self, key): elif isinstance(self.columns, MultiIndex): return self._getitem_multilevel(key) elif isinstance(key, DataFrame): - values = key.values - if values.dtype == bool: - return self.values[values] + if key.values.dtype == bool: + return self.where(key) else: raise ValueError('Cannot index using non-boolean DataFrame') else: @@ -1871,11 +1870,6 @@ def __setitem__(self, key, value): # support boolean setting with DataFrame input, e.g. # df[df > df2] = 0 if isinstance(key, DataFrame): - if not (key.index.equals(self.index) and - key.columns.equals(self.columns)): - raise PandasError('Can only index with like-indexed ' - 'DataFrame objects') - self._boolean_set(key, value) elif isinstance(key, (np.ndarray, list)): return self._set_item_multiple(key, value) @@ -1884,18 +1878,13 @@ def __setitem__(self, key, value): self._set_item(key, value) def _boolean_set(self, key, value): - mask = key.values - if mask.dtype != np.bool_: + if key.values.dtype != np.bool_: raise ValueError('Must pass DataFrame with boolean values only') if self._is_mixed_type: raise ValueError('Cannot do boolean setting on mixed-type frame') - if isinstance(value, DataFrame): - assert(value._indexed_same(self)) - np.putmask(self.values, mask, value.values) - else: - self.values[mask] = value + self.where(key, value, inplace=True) def _set_item_multiple(self, keys, value): if isinstance(value, DataFrame): @@ -4878,7 +4867,7 @@ def combineMult(self, other): """ return self.mul(other, fill_value=1.) - def where(self, cond, other): + def where(self, cond, other=NA, inplace=False): """ Return a DataFrame with the same shape as self and whose corresponding entries are from self where cond is True and otherwise are from other. @@ -4893,6 +4882,9 @@ def where(self, cond, other): ------- wh: DataFrame """ + if not hasattr(cond,'shape'): + raise ValueError('where requires an ndarray like object for its condition') + if isinstance(cond, np.ndarray): if cond.shape != self.shape: raise ValueError('Array onditional must be same shape as self') @@ -4905,13 +4897,17 @@ def where(self, cond, other): if isinstance(other, DataFrame): _, other = self.align(other, join='left', fill_value=NA) + if inplace: + np.putmask(self.values, cond, other) + return self + rs = np.where(cond, self, other) return self._constructor(rs, self.index, self.columns) - + def mask(self, cond): """ Returns copy of self whose values are replaced with nan if the - corresponding entry in cond is False + inverted condition is True Parameters ---------- @@ -4921,7 +4917,7 @@ def mask(self, cond): ------- wh: DataFrame """ - return self.where(cond, NA) + return self.where(~cond, NA) _EMPTY_SERIES = Series([]) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py old mode 100644 new mode 100755 index 0b36e8d39a00a..dcc7bcb909cd4 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -141,6 +141,12 @@ def test_getitem_boolean(self): self.assertRaises(ValueError, self.tsframe.__getitem__, self.tsframe) + # test df[df >0] works + bif = self.tsframe[self.tsframe > 0] + bifw = DataFrame(np.where(self.tsframe>0,self.tsframe,np.nan),index=self.tsframe.index,columns=self.tsframe.columns) + self.assert_(isinstance(bif,DataFrame)) + self.assert_(bif.shape == self.tsframe.shape) + assert_frame_equal(bif,bifw) def test_getitem_boolean_list(self): df = DataFrame(np.arange(12).reshape(3,4)) @@ -278,7 +284,11 @@ def test_setitem_boolean(self): values[values == 5] = 0 assert_almost_equal(df.values, values) - self.assertRaises(Exception, df.__setitem__, df[:-1] > 0, 2) + # a df that needs alignment first + df[df[:-1]<0] = 2 + np.putmask(values[:-1],values[:-1]<0,2) + assert_almost_equal(df.values, values) + self.assertRaises(Exception, df.__setitem__, df * 0, 2) # index with DataFrame @@ -5204,14 +5214,24 @@ def test_where(self): for k, v in rs.iteritems(): assert_series_equal(v, np.where(cond[k], df[k], other5)) - assert_frame_equal(rs, df.mask(cond)) - err1 = (df + 1).values[0:2, :] self.assertRaises(ValueError, df.where, cond, err1) err2 = cond.ix[:2, :].values self.assertRaises(ValueError, df.where, err2, other1) + # invalid conditions + self.assertRaises(ValueError, df.mask, True) + self.assertRaises(ValueError, df.mask, 0) + + def test_mask(self): + df = DataFrame(np.random.randn(5, 3)) + cond = df > 0 + + rs = df.where(cond, np.nan) + assert_frame_equal(rs, df.mask(df <= 0)) + assert_frame_equal(rs, df.mask(~cond)) + #---------------------------------------------------------------------- # Transposing