Skip to content

Commit 348afeb

Browse files
guygoldbergjreback
authored andcommitted
ENH: Support inplace clip (#15388) (#16462)
1 parent d7962c5 commit 348afeb

File tree

3 files changed

+67
-18
lines changed

3 files changed

+67
-18
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ Other Enhancements
3636
- :func:`to_pickle` has gained a protocol parameter (:issue:`16252`). By default, this parameter is set to `HIGHEST_PROTOCOL <https://docs.python.org/3/library/pickle.html#data-stream-format>`__
3737
- :func:`api.types.infer_dtype` now infers decimals. (:issue: `15690`)
3838
- :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`)
39+
- :func:`DataFrame.clip()` and :func: `Series.cip()` have gained an inplace argument. (:issue: `15388`)
3940

4041
.. _whatsnew_0210.api_breaking:
4142

pandas/core/generic.py

+37-14
Original file line numberDiff line numberDiff line change
@@ -4120,8 +4120,7 @@ def isnull(self):
41204120
def notnull(self):
41214121
return notnull(self).__finalize__(self)
41224122

4123-
def _clip_with_scalar(self, lower, upper):
4124-
4123+
def _clip_with_scalar(self, lower, upper, inplace=False):
41254124
if ((lower is not None and np.any(isnull(lower))) or
41264125
(upper is not None and np.any(isnull(upper)))):
41274126
raise ValueError("Cannot use an NA value as a clip threshold")
@@ -4137,10 +4136,16 @@ def _clip_with_scalar(self, lower, upper):
41374136
if np.any(mask):
41384137
result[mask] = np.nan
41394138

4140-
return self._constructor(
4141-
result, **self._construct_axes_dict()).__finalize__(self)
4139+
axes_dict = self._construct_axes_dict()
4140+
result = self._constructor(result, **axes_dict).__finalize__(self)
4141+
4142+
if inplace:
4143+
self._update_inplace(result)
4144+
else:
4145+
return result
41424146

4143-
def clip(self, lower=None, upper=None, axis=None, *args, **kwargs):
4147+
def clip(self, lower=None, upper=None, axis=None, inplace=False,
4148+
*args, **kwargs):
41444149
"""
41454150
Trim values at input threshold(s).
41464151
@@ -4150,6 +4155,9 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs):
41504155
upper : float or array_like, default None
41514156
axis : int or string axis name, optional
41524157
Align object with lower and upper along the given axis.
4158+
inplace : boolean, default False
4159+
Whether to perform the operation in place on the data
4160+
.. versionadded:: 0.21.0
41534161
41544162
Returns
41554163
-------
@@ -4192,6 +4200,8 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs):
41924200
if isinstance(self, ABCPanel):
41934201
raise NotImplementedError("clip is not supported yet for panels")
41944202

4203+
inplace = validate_bool_kwarg(inplace, 'inplace')
4204+
41954205
axis = nv.validate_clip_with_axis(axis, args, kwargs)
41964206

41974207
# GH 2747 (arguments were reversed)
@@ -4202,17 +4212,20 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs):
42024212
# fast-path for scalars
42034213
if ((lower is None or (is_scalar(lower) and is_number(lower))) and
42044214
(upper is None or (is_scalar(upper) and is_number(upper)))):
4205-
return self._clip_with_scalar(lower, upper)
4215+
return self._clip_with_scalar(lower, upper, inplace=inplace)
42064216

42074217
result = self
42084218
if lower is not None:
4209-
result = result.clip_lower(lower, axis)
4219+
result = result.clip_lower(lower, axis, inplace=inplace)
42104220
if upper is not None:
4211-
result = result.clip_upper(upper, axis)
4221+
if inplace:
4222+
result = self
4223+
4224+
result = result.clip_upper(upper, axis, inplace=inplace)
42124225

42134226
return result
42144227

4215-
def clip_upper(self, threshold, axis=None):
4228+
def clip_upper(self, threshold, axis=None, inplace=False):
42164229
"""
42174230
Return copy of input with values above given value(s) truncated.
42184231
@@ -4221,6 +4234,9 @@ def clip_upper(self, threshold, axis=None):
42214234
threshold : float or array_like
42224235
axis : int or string axis name, optional
42234236
Align object with threshold along the given axis.
4237+
inplace : boolean, default False
4238+
Whether to perform the operation in place on the data
4239+
.. versionadded:: 0.21.0
42244240
42254241
See Also
42264242
--------
@@ -4234,12 +4250,14 @@ def clip_upper(self, threshold, axis=None):
42344250
raise ValueError("Cannot use an NA value as a clip threshold")
42354251

42364252
if is_scalar(threshold) and is_number(threshold):
4237-
return self._clip_with_scalar(None, threshold)
4253+
return self._clip_with_scalar(None, threshold, inplace=inplace)
4254+
4255+
inplace = validate_bool_kwarg(inplace, 'inplace')
42384256

42394257
subset = self.le(threshold, axis=axis) | isnull(self)
4240-
return self.where(subset, threshold, axis=axis)
4258+
return self.where(subset, threshold, axis=axis, inplace=inplace)
42414259

4242-
def clip_lower(self, threshold, axis=None):
4260+
def clip_lower(self, threshold, axis=None, inplace=False):
42434261
"""
42444262
Return copy of the input with values below given value(s) truncated.
42454263
@@ -4248,6 +4266,9 @@ def clip_lower(self, threshold, axis=None):
42484266
threshold : float or array_like
42494267
axis : int or string axis name, optional
42504268
Align object with threshold along the given axis.
4269+
inplace : boolean, default False
4270+
Whether to perform the operation in place on the data
4271+
.. versionadded:: 0.21.0
42514272
42524273
See Also
42534274
--------
@@ -4261,10 +4282,12 @@ def clip_lower(self, threshold, axis=None):
42614282
raise ValueError("Cannot use an NA value as a clip threshold")
42624283

42634284
if is_scalar(threshold) and is_number(threshold):
4264-
return self._clip_with_scalar(threshold, None)
4285+
return self._clip_with_scalar(threshold, None, inplace=inplace)
4286+
4287+
inplace = validate_bool_kwarg(inplace, 'inplace')
42654288

42664289
subset = self.ge(threshold, axis=axis) | isnull(self)
4267-
return self.where(subset, threshold, axis=axis)
4290+
return self.where(subset, threshold, axis=axis, inplace=inplace)
42684291

42694292
def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
42704293
group_keys=True, squeeze=False, **kwargs):

pandas/tests/frame/test_analytics.py

+29-4
Original file line numberDiff line numberDiff line change
@@ -1807,6 +1807,7 @@ def test_built_in_round(self):
18071807

18081808
def test_clip(self):
18091809
median = self.frame.median().median()
1810+
original = self.frame.copy()
18101811

18111812
capped = self.frame.clip_upper(median)
18121813
assert not (capped.values > median).any()
@@ -1817,6 +1818,25 @@ def test_clip(self):
18171818
double = self.frame.clip(upper=median, lower=median)
18181819
assert not (double.values != median).any()
18191820

1821+
# Verify that self.frame was not changed inplace
1822+
assert (self.frame.values == original.values).all()
1823+
1824+
def test_inplace_clip(self):
1825+
# GH #15388
1826+
median = self.frame.median().median()
1827+
frame_copy = self.frame.copy()
1828+
1829+
frame_copy.clip_upper(median, inplace=True)
1830+
assert not (frame_copy.values > median).any()
1831+
frame_copy = self.frame.copy()
1832+
1833+
frame_copy.clip_lower(median, inplace=True)
1834+
assert not (frame_copy.values < median).any()
1835+
frame_copy = self.frame.copy()
1836+
1837+
frame_copy.clip(upper=median, lower=median, inplace=True)
1838+
assert not (frame_copy.values != median).any()
1839+
18201840
def test_dataframe_clip(self):
18211841
# GH #2747
18221842
df = DataFrame(np.random.randn(1000, 2))
@@ -1843,18 +1863,23 @@ def test_clip_mixed_numeric(self):
18431863
'B': [1., np.nan, 2.]})
18441864
tm.assert_frame_equal(result, expected, check_like=True)
18451865

1846-
def test_clip_against_series(self):
1866+
@pytest.mark.parametrize("inplace", [True, False])
1867+
def test_clip_against_series(self, inplace):
18471868
# GH #6966
18481869

18491870
df = DataFrame(np.random.randn(1000, 2))
18501871
lb = Series(np.random.randn(1000))
18511872
ub = lb + 1
18521873

1853-
clipped_df = df.clip(lb, ub, axis=0)
1874+
original = df.copy()
1875+
clipped_df = df.clip(lb, ub, axis=0, inplace=inplace)
1876+
1877+
if inplace:
1878+
clipped_df = df
18541879

18551880
for i in range(2):
1856-
lb_mask = df.iloc[:, i] <= lb
1857-
ub_mask = df.iloc[:, i] >= ub
1881+
lb_mask = original.iloc[:, i] <= lb
1882+
ub_mask = original.iloc[:, i] >= ub
18581883
mask = ~lb_mask & ~ub_mask
18591884

18601885
result = clipped_df.loc[lb_mask, i]

0 commit comments

Comments
 (0)