Skip to content

ENH: Allow setitem in a frame where only mixed numerics are present #3050

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 15, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ pandas 0.11.0
strings that can be parsed with datetime.strptime
- Add ``axes`` property to ``Series`` for compatibility
- Add ``xs`` function to ``Series`` for compatibility
- Allow setitem in a frame where only mixed numerics are present (e.g. int and float), (GH3037_)

**API Changes**

Expand Down Expand Up @@ -182,6 +183,7 @@ pandas 0.11.0
.. _GH3010: https://github.com/pydata/pandas/issues/3010
.. _GH3012: https://github.com/pydata/pandas/issues/3012
.. _GH3029: https://github.com/pydata/pandas/issues/3029
.. _GH3037: https://github.com/pydata/pandas/issues/3037
.. _GH3041: https://github.com/pydata/pandas/issues/3041


Expand Down
3 changes: 2 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2121,7 +2121,8 @@ def _setitem_frame(self, key, value):
raise ValueError('Must pass DataFrame with boolean values only')

if self._is_mixed_type:
raise ValueError('Cannot do boolean setting on mixed-type frame')
if not self._is_numeric_mixed_type:
raise ValueError('Cannot do boolean setting on mixed-type frame')

self.where(-key, value, inplace=True)

Expand Down
7 changes: 5 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,8 +671,11 @@ def consolidate(self, inplace=False):

@property
def _is_mixed_type(self):
self._consolidate_inplace()
return len(self._data.blocks) > 1
return self._data.is_mixed_type

@property
def _is_numeric_mixed_type(self):
return self._data.is_numeric_mixed_type

def _reindex_axis(self, new_index, fill_method, axis, copy):
new_data = self._data.reindex_axis(new_index, axis=axis,
Expand Down
17 changes: 14 additions & 3 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -980,6 +980,16 @@ def _consolidate_check(self):
self._is_consolidated = len(dtypes) == len(set(dtypes))
self._known_consolidated = True

@property
def is_mixed_type(self):
self._consolidate_inplace()
return len(self.blocks) > 1

@property
def is_numeric_mixed_type(self):
self._consolidate_inplace()
return all([ block.is_numeric for block in self.blocks ])

def get_numeric_data(self, copy=False, type_list=None, as_blocks = False):
"""
Parameters
Expand Down Expand Up @@ -1227,9 +1237,10 @@ def consolidate(self):
return BlockManager(new_blocks, self.axes)

def _consolidate_inplace(self):
self.blocks = _consolidate(self.blocks, self.items)
self._is_consolidated = True
self._known_consolidated = True
if not self.is_consolidated():
self.blocks = _consolidate(self.blocks, self.items)
self._is_consolidated = True
self._known_consolidated = True

def get(self, item):
_, block = self._find_block(item)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def get_new_values(self):
dtype, fill_value = _maybe_promote(values.dtype)
new_values = np.empty(result_shape, dtype=dtype)
new_values.fill(fill_value)

new_mask = np.zeros(result_shape, dtype=bool)

# is there a simpler / faster way of doing this?
Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8660,6 +8660,33 @@ def test_boolean_indexing(self):
df1[df1 > 2.0 * df2] = -1
assert_frame_equal(df1, expected)

def test_boolean_indexing_mixed(self):
df = DataFrame(
{0L: {35: np.nan, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan},
1L: {35: np.nan,
40: 0.32632316859446198,
43: np.nan,
49: 0.32632316859446198,
50: 0.39114724480578139},
2L: {35: np.nan, 40: np.nan, 43: 0.29012581014105987, 49: np.nan, 50: np.nan},
3L: {35: np.nan, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan},
4L: {35: 0.34215328467153283, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan},
'y': {35: 0, 40: 0, 43: 0, 49: 0, 50: 1}})

# mixed int/float ok
df2 = df.copy()
df2[df2>0.3] = 1
expected = df.copy()
expected.loc[40,1] = 1
expected.loc[49,1] = 1
expected.loc[50,1] = 1
expected.loc[35,4] = 1
assert_frame_equal(df2,expected)

# add object, should this raise?
df['foo'] = 'test'
self.assertRaises(ValueError, df.__setitem__, df>0.3, 1)

def test_sum_bools(self):
df = DataFrame(index=range(1), columns=range(10))
bools = isnull(df)
Expand Down