From 06e280843f9053677371534e6eff6eac8f11ee39 Mon Sep 17 00:00:00 2001 From: jreback Date: Thu, 3 Jul 2014 10:45:49 -0400 Subject: [PATCH] API: disallow inplace setting with where and a non-np.nan value (GH7656) --- doc/source/v0.14.1.txt | 7 ++----- pandas/core/frame.py | 10 +++------- pandas/core/generic.py | 20 ++++++++++++++++++++ pandas/tests/test_frame.py | 6 ++++++ 4 files changed, 31 insertions(+), 12 deletions(-) diff --git a/doc/source/v0.14.1.txt b/doc/source/v0.14.1.txt index 850e7e13db2ff..8ede5f32dded6 100644 --- a/doc/source/v0.14.1.txt +++ b/doc/source/v0.14.1.txt @@ -24,11 +24,6 @@ users upgrade to this version. API changes ~~~~~~~~~~~ - - - - - - All ``offsets`` suppports ``normalize`` keyword to specify whether ``offsets.apply``, ``rollforward`` and ``rollback`` resets time (hour, minute, etc) or not (default ``False``, preserves time) (:issue:`7156`) @@ -60,6 +55,8 @@ API changes - Bug in ``.loc`` performing fallback integer indexing with ``object`` dtype indices (:issue:`7496`) - Add back ``#N/A N/A`` as a default NA value in text parsing, (regresion from 0.12) (:issue:`5521`) +- Raise a ``TypeError`` on inplace-setting with a ``.where`` and a non ``np.nan`` value as this is inconsistent + with a set-item expression like ``df[mask] = None`` (:issue:`7656`) .. _whatsnew_0141.prior_deprecations: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 049d3b6a8578c..da9fb44f80b09 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -679,8 +679,8 @@ def to_gbq(self, destination_table, project_id=None, chunksize=10000, the defined table schema and column types. For simplicity, this method uses the Google BigQuery streaming API. The to_gbq method chunks data into a default chunk size of 10,000. Failures return the complete error - response which can be quite long depending on the size of the insert. - There are several important limitations of the Google streaming API + response which can be quite long depending on the size of the insert. + There are several important limitations of the Google streaming API which are detailed at: https://developers.google.com/bigquery/streaming-data-into-bigquery. @@ -1925,11 +1925,7 @@ def _setitem_frame(self, key, value): if key.values.dtype != np.bool_: raise TypeError('Must pass DataFrame with boolean values only') - if self._is_mixed_type: - if not self._is_numeric_mixed_type: - raise TypeError( - 'Cannot do boolean setting on mixed-type frame') - + self._check_inplace_setting(value) self._check_setitem_copy() self.where(-key, value, inplace=True) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 756de479a471a..c88aced3de8a2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1910,6 +1910,24 @@ def _is_datelike_mixed_type(self): f = lambda: self._data.is_datelike_mixed_type return self._protect_consolidate(f) + def _check_inplace_setting(self, value): + """ check whether we allow in-place setting with this type of value """ + + if self._is_mixed_type: + if not self._is_numeric_mixed_type: + + # allow an actual np.nan thru + try: + if np.isnan(value): + return True + except: + pass + + raise TypeError( + 'Cannot do inplace boolean setting on mixed-types with a non np.nan value') + + return True + def _protect_consolidate(self, f): blocks_before = len(self._data.blocks) result = f() @@ -3214,6 +3232,8 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, if inplace: # we may have different type blocks come out of putmask, so # reconstruct the block manager + + self._check_inplace_setting(other) new_data = self._data.putmask(mask=cond, new=other, align=axis is None, inplace=True) self._update_inplace(new_data) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 7368fcf8dac26..d7f8d235d4229 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -9242,6 +9242,12 @@ def test_where_none(self): expected = DataFrame({'series': Series([0,1,2,3,4,5,6,7,np.nan,np.nan]) }) assert_frame_equal(df, expected) + # GH 7656 + df = DataFrame([{'A': 1, 'B': np.nan, 'C': 'Test'}, {'A': np.nan, 'B': 'Test', 'C': np.nan}]) + expected = df.where(~isnull(df), None) + with tm.assertRaisesRegexp(TypeError, 'boolean setting on mixed-type'): + df.where(~isnull(df), None, inplace=True) + def test_where_align(self): def create():