Merge pull request #8520 from jreback/indexing

jreback · jreback · commit 89e60925b173 · 2014-10-09T09:36:59.000-04:00
BUG: Bug in inplace operations with column sub-selections on the lhs (GH8511)
diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt
@@ -272,6 +272,46 @@ API changes
      df
      df.dtypes
 
+- In prior versions, updating a pandas object inplace would not reflect in other python references to this object. (:issue:`8511`,:issue:`5104`)
+
+  .. ipython:: python
+
+     s = Series([1, 2, 3])
+     s2 = s
+     s += 1.5
+
+  Behavior prior to v0.15.0
+
+  .. code-block:: python
+
+
+     # the original object
+     In [5]: s
+     Out[5]:
+     0    2.5
+     1    3.5
+     2    4.5
+     dtype: float64
+
+
+     # a reference to the original object
+     In [7]: s2
+     Out[7]:
+     0    1
+     1    2
+     2    3
+     dtype: int64
+
+  This is now the correct behavior
+
+  .. ipython:: python
+
+     # the original object
+     s
+
+     # a reference to the original object
+     s2
+
 - ``Series.to_csv()`` now returns a string when ``path=None``, matching the behaviour of ``DataFrame.to_csv()`` (:issue:`8215`).
 
 - ``read_hdf`` now raises ``IOError`` when a file that doesn't exist is passed in. Previously, a new, empty file was created, and a ``KeyError`` raised (:issue:`7715`).
@@ -954,7 +994,6 @@ Bug Fixes
 - Bug in ``DataFrame.reset_index`` which has ``MultiIndex`` contains ``PeriodIndex`` or ``DatetimeIndex`` with tz raises ``ValueError`` (:issue:`7746`, :issue:`7793`)
 
 
-
 - Bug in ``DataFrame.plot`` with ``subplots=True`` may draw unnecessary minor xticks and yticks (:issue:`7801`)
 - Bug in ``StataReader`` which did not read variable labels in 117 files due to difference between Stata documentation and implementation (:issue:`7816`)
 - Bug in ``StataReader`` where strings were always converted to 244 characters-fixed width irrespective of underlying string size (:issue:`7858`)
diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -538,6 +538,6 @@ def duplicated(self, take_last=False):
     #----------------------------------------------------------------------
     # abstracts
 
-    def _update_inplace(self, result):
+    def _update_inplace(self, result, **kwargs):
         raise NotImplementedError
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -1108,9 +1108,21 @@ def _is_view(self):
         """ boolean : return if I am a view of another array """
         return self._data.is_view
 
-    def _maybe_update_cacher(self, clear=False):
-        """ see if we need to update our parent cacher
-            if clear, then clear our cache """
+    def _maybe_update_cacher(self, clear=False, verify_is_copy=True):
+        """
+
+        see if we need to update our parent cacher
+        if clear, then clear our cache
+
+        Parameters
+        ----------
+        clear : boolean, default False
+            clear the item cache
+        verify_is_copy : boolean, default True
+            provide is_copy checks
+
+        """
+
         cacher = getattr(self, '_cacher', None)
         if cacher is not None:
             ref = cacher[1]()
@@ -1125,8 +1137,8 @@ def _maybe_update_cacher(self, clear=False):
                 except:
                     pass
 
-        # check if we are a copy
-        self._check_setitem_copy(stacklevel=5, t='referant')
+        if verify_is_copy:
+            self._check_setitem_copy(stacklevel=5, t='referant')
 
         if clear:
             self._clear_item_cache()
@@ -1564,14 +1576,23 @@ def drop(self, labels, axis=0, level=None, inplace=False, **kwargs):
         else:
             return result
 
-    def _update_inplace(self, result):
-        "replace self internals with result."
+    def _update_inplace(self, result, verify_is_copy=True):
+        """
+        replace self internals with result.
+
+        Parameters
+        ----------
+        verify_is_copy : boolean, default True
+            provide is_copy checks
+
+        """
         # NOTE: This does *not* call __finalize__ and that's an explicit
         # decision that we may revisit in the future.
+
         self._reset_cache()
         self._clear_item_cache()
         self._data = getattr(result,'_data',result)
-        self._maybe_update_cacher()
+        self._maybe_update_cacher(verify_is_copy=verify_is_copy)
 
     def add_prefix(self, prefix):
         """
diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -220,7 +220,7 @@ def _simple_new(cls, values, name=None, **kwargs):
         result._reset_identity()
         return result
 
-    def _update_inplace(self, result):
+    def _update_inplace(self, result, **kwargs):
         # guard when called from IndexOpsMixin
         raise TypeError("Index can't be updated inplace")
 
diff --git a/pandas/core/ops.py b/pandas/core/ops.py
@@ -161,20 +161,39 @@ def add_special_arithmetic_methods(cls, arith_method=None, radd_func=None,
         if passed, will not set functions with names in exclude
     """
     radd_func = radd_func or operator.add
+
     # in frame, special methods have default_axis = None, comp methods use
     # 'columns'
+
     new_methods = _create_methods(arith_method, radd_func, comp_method,
                                   bool_method, use_numexpr, default_axis=None,
                                   special=True)
 
     # inplace operators (I feel like these should get passed an `inplace=True`
     # or just be removed
+
+    def _wrap_inplace_method(method):
+        """
+        return an inplace wrapper for this method
+        """
+
+        def f(self, other):
+            result = method(self, other)
+
+            # this makes sure that we are aligned like the input
+            # we are updating inplace so we want to ignore is_copy
+            self._update_inplace(result.reindex_like(self,copy=False)._data,
+                                 verify_is_copy=False)
+
+            return self
+        return f
+
     new_methods.update(dict(
-        __iadd__=new_methods["__add__"],
-        __isub__=new_methods["__sub__"],
-        __imul__=new_methods["__mul__"],
-        __itruediv__=new_methods["__truediv__"],
-        __ipow__=new_methods["__pow__"]
+        __iadd__=_wrap_inplace_method(new_methods["__add__"]),
+        __isub__=_wrap_inplace_method(new_methods["__sub__"]),
+        __imul__=_wrap_inplace_method(new_methods["__mul__"]),
+        __itruediv__=_wrap_inplace_method(new_methods["__truediv__"]),
+        __ipow__=_wrap_inplace_method(new_methods["__pow__"]),
     ))
     if not compat.PY3:
         new_methods["__idiv__"] = new_methods["__div__"]
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -267,8 +267,9 @@ def _set_subtyp(self, is_all_dates):
         else:
             object.__setattr__(self, '_subtyp', 'series')
 
-    def _update_inplace(self, result):
-        return generic.NDFrame._update_inplace(self, result)
+    def _update_inplace(self, result, **kwargs):
+        # we want to call the generic version and not the IndexOpsMixin
+        return generic.NDFrame._update_inplace(self, result, **kwargs)
 
     # ndarray compatibility
     @property
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -249,6 +249,109 @@ def test_setitem_mulit_index(self):
         df[('joe', 'last')] = df[('jolie', 'first')].loc[i, j]
         assert_frame_equal(df[('joe', 'last')], df[('jolie', 'first')])
 
+    def test_inplace_ops_alignment(self):
+
+        # inplace ops / ops alignment
+        # GH 8511
+
+        columns = list('abcdefg')
+        X_orig = DataFrame(np.arange(10*len(columns)).reshape(-1,len(columns)), columns=columns, index=range(10))
+        Z = 100*X_orig.iloc[:,1:-1].copy()
+        block1 = list('bedcf')
+        subs = list('bcdef')
+
+        # add
+        X = X_orig.copy()
+        result1 = (X[block1] + Z).reindex(columns=subs)
+
+        X[block1] += Z
+        result2 = X.reindex(columns=subs)
+
+        X = X_orig.copy()
+        result3 = (X[block1] + Z[block1]).reindex(columns=subs)
+
+        X[block1] += Z[block1]
+        result4 = X.reindex(columns=subs)
+
+        assert_frame_equal(result1, result2)
+        assert_frame_equal(result1, result3)
+        assert_frame_equal(result1, result4)
+
+        # sub
+        X = X_orig.copy()
+        result1 = (X[block1] - Z).reindex(columns=subs)
+
+        X[block1] -= Z
+        result2 = X.reindex(columns=subs)
+
+        X = X_orig.copy()
+        result3 = (X[block1] - Z[block1]).reindex(columns=subs)
+
+        X[block1] -= Z[block1]
+        result4 = X.reindex(columns=subs)
+
+        assert_frame_equal(result1, result2)
+        assert_frame_equal(result1, result3)
+        assert_frame_equal(result1, result4)
+
+    def test_inplace_ops_identity(self):
+
+        # GH 5104
+        # make sure that we are actually changing the object
+        s_orig = Series([1, 2, 3])
+        df_orig = DataFrame(np.random.randint(0,5,size=10).reshape(-1,5))
+
+        # no dtype change
+        s = s_orig.copy()
+        s2 = s
+        s += 1
+        assert_series_equal(s,s2)
+        assert_series_equal(s_orig+1,s)
+        self.assertIs(s,s2)
+        self.assertIs(s._data,s2._data)
+
+        df = df_orig.copy()
+        df2 = df
+        df += 1
+        assert_frame_equal(df,df2)
+        assert_frame_equal(df_orig+1,df)
+        self.assertIs(df,df2)
+        self.assertIs(df._data,df2._data)
+
+        # dtype change
+        s = s_orig.copy()
+        s2 = s
+        s += 1.5
+        assert_series_equal(s,s2)
+        assert_series_equal(s_orig+1.5,s)
+
+        df = df_orig.copy()
+        df2 = df
+        df += 1.5
+        assert_frame_equal(df,df2)
+        assert_frame_equal(df_orig+1.5,df)
+        self.assertIs(df,df2)
+        self.assertIs(df._data,df2._data)
+
+        # mixed dtype
+        arr = np.random.randint(0,10,size=5)
+        df_orig = DataFrame({'A' : arr.copy(), 'B' : 'foo'})
+        df = df_orig.copy()
+        df2 = df
+        df['A'] += 1
+        expected = DataFrame({'A' : arr.copy()+1, 'B' : 'foo'})
+        assert_frame_equal(df,expected)
+        assert_frame_equal(df2,expected)
+        self.assertIs(df._data,df2._data)
+
+        df = df_orig.copy()
+        df2 = df
+        df['A'] += 1.5
+        expected = DataFrame({'A' : arr.copy()+1.5, 'B' : 'foo'})
+        assert_frame_equal(df,expected)
+        assert_frame_equal(df2,expected)
+        self.assertIs(df._data,df2._data)
+
     def test_getitem_boolean(self):
         # boolean indexing
         d = self.tsframe.index[10]
@@ -4979,7 +5082,6 @@ def test_div(self):
         self.assertFalse(np.array_equal(res.fillna(0), res2.fillna(0)))
 
     def test_logical_operators(self):
-        import operator
 
         def _check_bin_op(op):
             result = op(df1, df2)