Skip to content

Commit 71e2f5d

Browse files
committed
Merge pull request #5247 from jtratner/inplace-drop
ENH: Add inplace option to drop and dropna
2 parents 6f31fd1 + 4e0d29a commit 71e2f5d

File tree

9 files changed

+165
-40
lines changed

9 files changed

+165
-40
lines changed

doc/source/release.rst

+8
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,11 @@ Improvements to existing features
200200
argument. (:issue:`5354`)
201201
- Added short docstrings to a few methods that were missing them + fixed the
202202
docstrings for Panel flex methods. (:issue:`5336`)
203+
- ``NDFrame.drop()``, ``NDFrame.dropna()``, and ``.drop_duplicates()`` all
204+
accept ``inplace`` as a kewyord argument; however, this only means that the
205+
wrapper is updated inplace, a copy is still made internally.
206+
(:issue:`1960`, :issue:`5247`, and related :issue:`2325` [still not
207+
closed])
203208

204209
API Changes
205210
~~~~~~~~~~~
@@ -474,6 +479,9 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
474479
- Unity ``dropna`` for Series/DataFrame signature (:issue:`5250`),
475480
tests from :issue:`5234`, courtesy of @rockg
476481
- Rewrite assert_almost_equal() in cython for performance (:issue:`4398`)
482+
- Added an internal ``_update_inplace`` method to facilitate updating
483+
``NDFrame`` wrappers on inplace ops (only is for convenience of caller,
484+
doesn't actually prevent copies). (:issue:`5247`)
477485

478486
.. _release.bug_fixes-0.13.0:
479487

pandas/core/frame.py

+29-22
Original file line numberDiff line numberDiff line change
@@ -2421,7 +2421,7 @@ def _maybe_cast(values, labels=None):
24212421
#----------------------------------------------------------------------
24222422
# Reindex-based selection methods
24232423

2424-
def dropna(self, axis=0, how='any', thresh=None, subset=None):
2424+
def dropna(self, axis=0, how='any', thresh=None, subset=None, inplace=False):
24252425
"""
24262426
Return object with labels on given axis omitted where alternately any
24272427
or all of the data are missing
@@ -2438,6 +2438,8 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None):
24382438
subset : array-like
24392439
Labels along other axis to consider, e.g. if you are dropping rows
24402440
these would be a list of columns to include
2441+
inplace : bool, defalt False
2442+
If True, do operation inplace and return None.
24412443
24422444
Returns
24432445
-------
@@ -2448,31 +2450,36 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None):
24482450
for ax in axis:
24492451
result = result.dropna(how=how, thresh=thresh,
24502452
subset=subset, axis=ax)
2451-
return result
2452-
2453-
axis = self._get_axis_number(axis)
2454-
agg_axis = 1 - axis
2455-
2456-
agg_obj = self
2457-
if subset is not None:
2458-
agg_axis_name = self._get_axis_name(agg_axis)
2459-
agg_obj = self.reindex(**{agg_axis_name: subset})
2453+
else:
2454+
axis = self._get_axis_number(axis)
2455+
agg_axis = 1 - axis
2456+
2457+
agg_obj = self
2458+
if subset is not None:
2459+
agg_axis_name = self._get_axis_name(agg_axis)
2460+
agg_obj = self.reindex(**{agg_axis_name: subset})
2461+
2462+
count = agg_obj.count(axis=agg_axis)
2463+
2464+
if thresh is not None:
2465+
mask = count >= thresh
2466+
elif how == 'any':
2467+
mask = count == len(agg_obj._get_axis(agg_axis))
2468+
elif how == 'all':
2469+
mask = count > 0
2470+
else:
2471+
if how is not None:
2472+
raise ValueError('invalid how option: %s' % how)
2473+
else:
2474+
raise TypeError('must specify how or thresh')
24602475

2461-
count = agg_obj.count(axis=agg_axis)
2476+
result = self.take(mask.nonzero()[0], axis=axis, convert=False)
24622477

2463-
if thresh is not None:
2464-
mask = count >= thresh
2465-
elif how == 'any':
2466-
mask = count == len(agg_obj._get_axis(agg_axis))
2467-
elif how == 'all':
2468-
mask = count > 0
2478+
if inplace:
2479+
self._update_inplace(result)
24692480
else:
2470-
if how is not None:
2471-
raise ValueError('invalid how option: %s' % how)
2472-
else:
2473-
raise TypeError('must specify how or thresh')
2481+
return result
24742482

2475-
return self.take(mask.nonzero()[0], axis=axis, convert=False)
24762483

24772484
def drop_duplicates(self, cols=None, take_last=False, inplace=False):
24782485
"""

pandas/core/generic.py

+18-3
Original file line numberDiff line numberDiff line change
@@ -1101,7 +1101,7 @@ def reindex_like(self, other, method=None, copy=True, limit=None):
11011101
d = other._construct_axes_dict(method=method)
11021102
return self.reindex(**d)
11031103

1104-
def drop(self, labels, axis=0, level=None):
1104+
def drop(self, labels, axis=0, level=None, inplace=False, **kwargs):
11051105
"""
11061106
Return new object with labels in requested axis removed
11071107
@@ -1111,6 +1111,8 @@ def drop(self, labels, axis=0, level=None):
11111111
axis : int or axis name
11121112
level : int or name, default None
11131113
For MultiIndex
1114+
inplace : bool, default False
1115+
If True, do operation inplace and return None.
11141116
11151117
Returns
11161118
-------
@@ -1132,7 +1134,7 @@ def drop(self, labels, axis=0, level=None):
11321134
dropped.axes[axis_].set_names(axis.names, inplace=True)
11331135
except AttributeError:
11341136
pass
1135-
return dropped
1137+
result = dropped
11361138

11371139
else:
11381140
labels = com._index_labels_to_array(labels)
@@ -1147,7 +1149,20 @@ def drop(self, labels, axis=0, level=None):
11471149
slicer = [slice(None)] * self.ndim
11481150
slicer[self._get_axis_number(axis_name)] = indexer
11491151

1150-
return self.ix[tuple(slicer)]
1152+
result = self.ix[tuple(slicer)]
1153+
1154+
if inplace:
1155+
self._update_inplace(result)
1156+
else:
1157+
return result
1158+
1159+
def _update_inplace(self, result):
1160+
"replace self internals with result."
1161+
# NOTE: This does *not* call __finalize__ and that's an explicit
1162+
# decision that we may revisit in the future.
1163+
self._reset_cache()
1164+
self._data = result._data
1165+
self._maybe_update_cacher()
11511166

11521167
def add_prefix(self, prefix):
11531168
"""

pandas/core/panel.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -615,7 +615,7 @@ def _reindex_multi(self, axes, copy, fill_value):
615615
return Panel(new_values, items=new_items, major_axis=new_major,
616616
minor_axis=new_minor)
617617

618-
def dropna(self, axis=0, how='any', **kwargs):
618+
def dropna(self, axis=0, how='any', inplace=False, **kwargs):
619619
"""
620620
Drop 2D from panel, holding passed axis constant
621621
@@ -627,6 +627,8 @@ def dropna(self, axis=0, how='any', **kwargs):
627627
how : {'all', 'any'}, default 'any'
628628
'any': one or more values are NA in the DataFrame along the
629629
axis. For 'all' they all must be.
630+
inplace : bool, default False
631+
If True, do operation inplace and return None.
630632
631633
Returns
632634
-------
@@ -648,7 +650,11 @@ def dropna(self, axis=0, how='any', **kwargs):
648650
cond = mask == per_slice
649651

650652
new_ax = self._get_axis(axis)[cond]
651-
return self.reindex_axis(new_ax, axis=axis)
653+
result = self.reindex_axis(new_ax, axis=axis)
654+
if inplace:
655+
self._update_inplace(result)
656+
else:
657+
return result
652658

653659
def _combine(self, other, func, axis=0):
654660
if isinstance(other, Panel):

pandas/core/series.py

+19-5
Original file line numberDiff line numberDiff line change
@@ -1155,21 +1155,28 @@ def nunique(self):
11551155
"""
11561156
return len(self.value_counts())
11571157

1158-
def drop_duplicates(self, take_last=False):
1158+
def drop_duplicates(self, take_last=False, inplace=False):
11591159
"""
11601160
Return Series with duplicate values removed
11611161
11621162
Parameters
11631163
----------
11641164
take_last : boolean, default False
11651165
Take the last observed index in a group. Default first
1166+
inplace : boolean, default False
1167+
If True, performs operation inplace and returns None.
11661168
11671169
Returns
11681170
-------
11691171
deduplicated : Series
11701172
"""
11711173
duplicated = self.duplicated(take_last=take_last)
1172-
return self[-duplicated]
1174+
result = self[-duplicated]
1175+
if inplace:
1176+
return self._update_inplace(result)
1177+
else:
1178+
return result
1179+
11731180

11741181
def duplicated(self, take_last=False):
11751182
"""
@@ -2190,18 +2197,25 @@ def to_csv(self, path, index=True, sep=",", na_rep='',
21902197
index_label=index_label, mode=mode, nanRep=nanRep,
21912198
encoding=encoding, date_format=date_format)
21922199

2193-
def dropna(self, axis=0, **kwargs):
2200+
def dropna(self, axis=0, inplace=False, **kwargs):
21942201
"""
21952202
Return Series without null values
21962203
21972204
Returns
21982205
-------
21992206
valid : Series
2207+
inplace : bool (default False)
2208+
Do operation in place.
22002209
"""
22012210
axis = self._get_axis_number(axis or 0)
2202-
return remove_na(self)
2211+
result = remove_na(self)
2212+
if inplace:
2213+
self._update_inplace(result)
2214+
else:
2215+
return result
22032216

2204-
valid = lambda self: self.dropna()
2217+
valid = lambda self, inplace=False, **kwargs: self.dropna(inplace=inplace,
2218+
**kwargs)
22052219

22062220
def first_valid_index(self):
22072221
"""

pandas/sparse/series.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -569,13 +569,16 @@ def cumsum(self, axis=0, dtype=None, out=None):
569569
return self._constructor(new_array, index=self.index, sparse_index=new_array.sp_index).__finalize__(self)
570570
return Series(new_array, index=self.index).__finalize__(self)
571571

572-
def dropna(self, axis=0, **kwargs):
572+
def dropna(self, axis=0, inplace=False, **kwargs):
573573
"""
574574
Analogous to Series.dropna. If fill_value=NaN, returns a dense Series
575575
"""
576576
# TODO: make more efficient
577577
axis = self._get_axis_number(axis or 0)
578578
dense_valid = self.to_dense().valid()
579+
if inplace:
580+
raise NotImplementedError("Cannot perform inplace dropna"
581+
" operations on a SparseSeries")
579582
if isnull(self.fill_value):
580583
return dense_valid
581584
else:

0 commit comments

Comments
 (0)