pandas-dev · jtratner · Oct 28, 2013 · Oct 17, 2013 · jreback · Oct 27, 2013
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -200,6 +200,11 @@ Improvements to existing features
     argument. (:issue:`5354`)
   - Added short docstrings to a few methods that were missing them + fixed the
     docstrings for Panel flex methods. (:issue:`5336`)
+  - ``NDFrame.drop()``, ``NDFrame.dropna()``, and ``.drop_duplicates()`` all
+    accept ``inplace`` as a kewyord argument; however, this only means that the
+    wrapper is updated inplace, a copy is still made internally.
+    (:issue:`1960`, :issue:`5247`, and related :issue:`2325` [still not
+    closed])
 
 API Changes
 ~~~~~~~~~~~
@@ -474,6 +479,9 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
  - Unity ``dropna`` for Series/DataFrame signature (:issue:`5250`),
    tests from :issue:`5234`, courtesy of @rockg
  - Rewrite assert_almost_equal() in cython for performance (:issue:`4398`)
+ - Added an internal ``_update_inplace`` method to facilitate updating
+   ``NDFrame`` wrappers on inplace ops (only is for convenience of caller,
+   doesn't actually prevent copies). (:issue:`5247`)
 
 .. _release.bug_fixes-0.13.0:
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2421,7 +2421,7 @@ def _maybe_cast(values, labels=None):
     #----------------------------------------------------------------------
     # Reindex-based selection methods
 
-    def dropna(self, axis=0, how='any', thresh=None, subset=None):
+    def dropna(self, axis=0, how='any', thresh=None, subset=None, inplace=False):
         """
         Return object with labels on given axis omitted where alternately any
         or all of the data are missing
@@ -2438,6 +2438,8 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None):
         subset : array-like
             Labels along other axis to consider, e.g. if you are dropping rows
             these would be a list of columns to include
+        inplace : bool, defalt False
+            If True, do operation inplace and return None.
 
         Returns
         -------
@@ -2448,31 +2450,36 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None):
             for ax in axis:
                 result = result.dropna(how=how, thresh=thresh,
                                        subset=subset, axis=ax)
-            return result
-
-        axis = self._get_axis_number(axis)
-        agg_axis = 1 - axis
-
-        agg_obj = self
-        if subset is not None:
-            agg_axis_name = self._get_axis_name(agg_axis)
-            agg_obj = self.reindex(**{agg_axis_name: subset})
+        else:
+            axis = self._get_axis_number(axis)
+            agg_axis = 1 - axis
+
+            agg_obj = self
+            if subset is not None:
+                agg_axis_name = self._get_axis_name(agg_axis)
+                agg_obj = self.reindex(**{agg_axis_name: subset})
+
+            count = agg_obj.count(axis=agg_axis)
+
+            if thresh is not None:
+                mask = count >= thresh
+            elif how == 'any':
+                mask = count == len(agg_obj._get_axis(agg_axis))
+            elif how == 'all':
+                mask = count > 0
+            else:
+                if how is not None:
+                    raise ValueError('invalid how option: %s' % how)
+                else:
+                    raise TypeError('must specify how or thresh')
 
-        count = agg_obj.count(axis=agg_axis)
+            result = self.take(mask.nonzero()[0], axis=axis, convert=False)
 
-        if thresh is not None:
-            mask = count >= thresh
-        elif how == 'any':
-            mask = count == len(agg_obj._get_axis(agg_axis))
-        elif how == 'all':
-            mask = count > 0
+        if inplace:
+            self._update_inplace(result)
         else:
-            if how is not None:
-                raise ValueError('invalid how option: %s' % how)
-            else:
-                raise TypeError('must specify how or thresh')
+            return result
 
-        return self.take(mask.nonzero()[0], axis=axis, convert=False)
 
     def drop_duplicates(self, cols=None, take_last=False, inplace=False):
         """

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -1101,7 +1101,7 @@ def reindex_like(self, other, method=None, copy=True, limit=None):
         d = other._construct_axes_dict(method=method)
         return self.reindex(**d)
 
-    def drop(self, labels, axis=0, level=None):
+    def drop(self, labels, axis=0, level=None, inplace=False, **kwargs):
         """
         Return new object with labels in requested axis removed
 
@@ -1111,6 +1111,8 @@ def drop(self, labels, axis=0, level=None):
         axis : int or axis name
         level : int or name, default None
             For MultiIndex
+        inplace : bool, default False
+            If True, do operation inplace and return None.
 
         Returns
         -------
@@ -1132,7 +1134,7 @@ def drop(self, labels, axis=0, level=None):
                 dropped.axes[axis_].set_names(axis.names, inplace=True)
             except AttributeError:
                 pass
-            return dropped
+            result = dropped
 
         else:
             labels = com._index_labels_to_array(labels)
@@ -1147,7 +1149,20 @@ def drop(self, labels, axis=0, level=None):
             slicer = [slice(None)] * self.ndim
             slicer[self._get_axis_number(axis_name)] = indexer
 
-            return self.ix[tuple(slicer)]
+            result = self.ix[tuple(slicer)]
+
+        if inplace:
+            self._update_inplace(result)
+        else:
+            return result
+
+    def _update_inplace(self, result):
+        "replace self internals with result."
+        # NOTE: This does *not* call __finalize__ and that's an explicit
+        # decision that we may revisit in the future.
+        self._reset_cache()
+        self._data = result._data
+        self._maybe_update_cacher()
 
     def add_prefix(self, prefix):
         """

diff --git a/pandas/core/panel.py b/pandas/core/panel.py
@@ -615,7 +615,7 @@ def _reindex_multi(self, axes, copy, fill_value):
         return Panel(new_values, items=new_items, major_axis=new_major,
                      minor_axis=new_minor)
 
-    def dropna(self, axis=0, how='any', **kwargs):
+    def dropna(self, axis=0, how='any', inplace=False, **kwargs):
         """
         Drop 2D from panel, holding passed axis constant
 
@@ -627,6 +627,8 @@ def dropna(self, axis=0, how='any', **kwargs):
         how : {'all', 'any'}, default 'any'
             'any': one or more values are NA in the DataFrame along the
             axis. For 'all' they all must be.
+        inplace : bool, default False
+            If True, do operation inplace and return None.
 
         Returns
         -------
@@ -648,7 +650,11 @@ def dropna(self, axis=0, how='any', **kwargs):
             cond = mask == per_slice
 
         new_ax = self._get_axis(axis)[cond]
-        return self.reindex_axis(new_ax, axis=axis)
+        result = self.reindex_axis(new_ax, axis=axis)
+        if inplace:
+            self._update_inplace(result)
+        else:
+            return result
 
     def _combine(self, other, func, axis=0):
         if isinstance(other, Panel):

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -1155,21 +1155,28 @@ def nunique(self):
         """
         return len(self.value_counts())
 
-    def drop_duplicates(self, take_last=False):
+    def drop_duplicates(self, take_last=False, inplace=False):
         """
         Return Series with duplicate values removed
 
         Parameters
         ----------
         take_last : boolean, default False
             Take the last observed index in a group. Default first
+        inplace : boolean, default False
+            If True, performs operation inplace and returns None.
 
         Returns
         -------
         deduplicated : Series
         """
         duplicated = self.duplicated(take_last=take_last)
-        return self[-duplicated]
+        result = self[-duplicated]
+        if inplace:
+            return self._update_inplace(result)
+        else:
+            return result
+
 
     def duplicated(self, take_last=False):
         """
@@ -2190,18 +2197,25 @@ def to_csv(self, path, index=True, sep=",", na_rep='',
                   index_label=index_label, mode=mode, nanRep=nanRep,
                   encoding=encoding, date_format=date_format)
 
-    def dropna(self, axis=0, **kwargs):
+    def dropna(self, axis=0, inplace=False, **kwargs):
         """
         Return Series without null values
 
         Returns
         -------
         valid : Series
+        inplace : bool (default False)
+            Do operation in place.
         """
         axis = self._get_axis_number(axis or 0)
-        return remove_na(self)
+        result = remove_na(self)
+        if inplace:
+            self._update_inplace(result)
+        else:
+            return result
 
-    valid = lambda self: self.dropna()
+    valid = lambda self, inplace=False, **kwargs: self.dropna(inplace=inplace,
+                                                              **kwargs)
 
     def first_valid_index(self):
         """

diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py
@@ -569,13 +569,16 @@ def cumsum(self, axis=0, dtype=None, out=None):
             return self._constructor(new_array, index=self.index, sparse_index=new_array.sp_index).__finalize__(self)
         return Series(new_array, index=self.index).__finalize__(self)
 
-    def dropna(self, axis=0, **kwargs):
+    def dropna(self, axis=0, inplace=False, **kwargs):
         """
         Analogous to Series.dropna. If fill_value=NaN, returns a dense Series
         """
         # TODO: make more efficient
         axis = self._get_axis_number(axis or 0)
         dense_valid = self.to_dense().valid()
+        if inplace:
+            raise NotImplementedError("Cannot perform inplace dropna"
+                                      " operations on a SparseSeries")
         if isnull(self.fill_value):
             return dense_valid
         else: