diff --git a/RELEASE.rst b/RELEASE.rst index 2eb7980458f8e..032bca22d6f24 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -62,6 +62,7 @@ pandas 0.11.0 strings that can be parsed with datetime.strptime - Add ``axes`` property to ``Series`` for compatibility - Add ``xs`` function to ``Series`` for compatibility + - Allow setitem in a frame where only mixed numerics are present (e.g. int and float), (GH3037_) **API Changes** @@ -182,6 +183,7 @@ pandas 0.11.0 .. _GH3010: https://github.com/pydata/pandas/issues/3010 .. _GH3012: https://github.com/pydata/pandas/issues/3012 .. _GH3029: https://github.com/pydata/pandas/issues/3029 +.. _GH3037: https://github.com/pydata/pandas/issues/3037 .. _GH3041: https://github.com/pydata/pandas/issues/3041 diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ee586a2101f62..08db4e11b8ee1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2121,7 +2121,8 @@ def _setitem_frame(self, key, value): raise ValueError('Must pass DataFrame with boolean values only') if self._is_mixed_type: - raise ValueError('Cannot do boolean setting on mixed-type frame') + if not self._is_numeric_mixed_type: + raise ValueError('Cannot do boolean setting on mixed-type frame') self.where(-key, value, inplace=True) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 093f600f8c4ea..2c5c8c4d088be 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -671,8 +671,11 @@ def consolidate(self, inplace=False): @property def _is_mixed_type(self): - self._consolidate_inplace() - return len(self._data.blocks) > 1 + return self._data.is_mixed_type + + @property + def _is_numeric_mixed_type(self): + return self._data.is_numeric_mixed_type def _reindex_axis(self, new_index, fill_method, axis, copy): new_data = self._data.reindex_axis(new_index, axis=axis, diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 2a41bbffa3b83..d2c3f4104950b 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -980,6 +980,16 @@ def _consolidate_check(self): self._is_consolidated = len(dtypes) == len(set(dtypes)) self._known_consolidated = True + @property + def is_mixed_type(self): + self._consolidate_inplace() + return len(self.blocks) > 1 + + @property + def is_numeric_mixed_type(self): + self._consolidate_inplace() + return all([ block.is_numeric for block in self.blocks ]) + def get_numeric_data(self, copy=False, type_list=None, as_blocks = False): """ Parameters @@ -1227,9 +1237,10 @@ def consolidate(self): return BlockManager(new_blocks, self.axes) def _consolidate_inplace(self): - self.blocks = _consolidate(self.blocks, self.items) - self._is_consolidated = True - self._known_consolidated = True + if not self.is_consolidated(): + self.blocks = _consolidate(self.blocks, self.items) + self._is_consolidated = True + self._known_consolidated = True def get(self, item): _, block = self._find_block(item) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 4598b37d7da6a..2e7ec3ad9c280 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -159,7 +159,7 @@ def get_new_values(self): dtype, fill_value = _maybe_promote(values.dtype) new_values = np.empty(result_shape, dtype=dtype) new_values.fill(fill_value) - + new_mask = np.zeros(result_shape, dtype=bool) # is there a simpler / faster way of doing this? diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 1c30dfd1abced..4f7472fa46dbe 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -8660,6 +8660,33 @@ def test_boolean_indexing(self): df1[df1 > 2.0 * df2] = -1 assert_frame_equal(df1, expected) + def test_boolean_indexing_mixed(self): + df = DataFrame( + {0L: {35: np.nan, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan}, + 1L: {35: np.nan, + 40: 0.32632316859446198, + 43: np.nan, + 49: 0.32632316859446198, + 50: 0.39114724480578139}, + 2L: {35: np.nan, 40: np.nan, 43: 0.29012581014105987, 49: np.nan, 50: np.nan}, + 3L: {35: np.nan, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan}, + 4L: {35: 0.34215328467153283, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan}, + 'y': {35: 0, 40: 0, 43: 0, 49: 0, 50: 1}}) + + # mixed int/float ok + df2 = df.copy() + df2[df2>0.3] = 1 + expected = df.copy() + expected.loc[40,1] = 1 + expected.loc[49,1] = 1 + expected.loc[50,1] = 1 + expected.loc[35,4] = 1 + assert_frame_equal(df2,expected) + + # add object, should this raise? + df['foo'] = 'test' + self.assertRaises(ValueError, df.__setitem__, df>0.3, 1) + def test_sum_bools(self): df = DataFrame(index=range(1), columns=range(10)) bools = isnull(df)