Skip to content

Commit 14a315c

Browse files
authored
PERF: DataFrame.clip / Series.clip (pandas-dev#51472)
1 parent f5405b5 commit 14a315c

File tree

5 files changed

+49
-20
lines changed

5 files changed

+49
-20
lines changed

asv_bench/benchmarks/frame_methods.py

+15
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,21 @@
1717
from .pandas_vb_common import tm
1818

1919

20+
class Clip:
21+
params = [
22+
["float64", "Float64", "float64[pyarrow]"],
23+
]
24+
param_names = ["dtype"]
25+
26+
def setup(self, dtype):
27+
data = np.random.randn(100_000, 10)
28+
df = DataFrame(data, dtype=dtype)
29+
self.df = df
30+
31+
def time_clip(self, dtype):
32+
self.df.clip(-1.0, 1.0)
33+
34+
2035
class GetNumericData:
2136
def setup(self):
2237
self.df = DataFrame(np.random.randn(10000, 25))

doc/source/whatsnew/v2.1.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ Deprecations
100100

101101
Performance improvements
102102
~~~~~~~~~~~~~~~~~~~~~~~~
103-
-
103+
- Performance improvement in :meth:`DataFrame.clip` and :meth:`Series.clip` (:issue:`51472`)
104104
-
105105

106106
.. ---------------------------------------------------------------------------

pandas/core/generic.py

+20-11
Original file line numberDiff line numberDiff line change
@@ -7985,24 +7985,33 @@ def _clip_with_scalar(self, lower, upper, inplace: bool_t = False):
79857985
):
79867986
raise ValueError("Cannot use an NA value as a clip threshold")
79877987

7988-
result = self
7989-
mask = isna(self._values)
7988+
mgr = self._mgr
79907989

7991-
with np.errstate(all="ignore"):
7990+
if inplace:
7991+
# cond (for putmask) identifies values to be updated.
7992+
# exclude boundary as values at the boundary should be no-ops.
79927993
if upper is not None:
7993-
subset = self <= upper
7994-
result = result.where(subset, upper, axis=None, inplace=False)
7994+
cond = self > upper
7995+
mgr = mgr.putmask(mask=cond, new=upper, align=False)
79957996
if lower is not None:
7996-
subset = self >= lower
7997-
result = result.where(subset, lower, axis=None, inplace=False)
7998-
7999-
if np.any(mask):
8000-
result[mask] = np.nan
7997+
cond = self < lower
7998+
mgr = mgr.putmask(mask=cond, new=lower, align=False)
7999+
else:
8000+
# cond (for where) identifies values to be left as-is.
8001+
# include boundary as values at the boundary should be no-ops.
8002+
mask = isna(self)
8003+
if upper is not None:
8004+
cond = mask | (self <= upper)
8005+
mgr = mgr.where(other=upper, cond=cond, align=False)
8006+
if lower is not None:
8007+
cond = mask | (self >= lower)
8008+
mgr = mgr.where(other=lower, cond=cond, align=False)
80018009

8010+
result = self._constructor(mgr)
80028011
if inplace:
80038012
return self._update_inplace(result)
80048013
else:
8005-
return result
8014+
return result.__finalize__(self)
80068015

80078016
@final
80088017
def _clip_with_one_bound(self, threshold, method, axis, inplace):

pandas/tests/copy_view/test_clip.py

+6-8
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,13 @@ def test_clip_inplace_reference(using_copy_on_write):
1212
view = df[:]
1313
df.clip(lower=2, inplace=True)
1414

15-
# Clip not actually inplace right now but could be
16-
assert not np.shares_memory(get_array(df, "a"), arr_a)
17-
1815
if using_copy_on_write:
16+
assert not np.shares_memory(get_array(df, "a"), arr_a)
1917
assert df._mgr._has_no_reference(0)
2018
assert view._mgr._has_no_reference(0)
2119
tm.assert_frame_equal(df_copy, view)
20+
else:
21+
assert np.shares_memory(get_array(df, "a"), arr_a)
2222

2323

2424
def test_clip_inplace_reference_no_op(using_copy_on_write):
@@ -28,22 +28,20 @@ def test_clip_inplace_reference_no_op(using_copy_on_write):
2828
view = df[:]
2929
df.clip(lower=0, inplace=True)
3030

31+
assert np.shares_memory(get_array(df, "a"), arr_a)
32+
3133
if using_copy_on_write:
32-
assert np.shares_memory(get_array(df, "a"), arr_a)
3334
assert not df._mgr._has_no_reference(0)
3435
assert not view._mgr._has_no_reference(0)
3536
tm.assert_frame_equal(df_copy, view)
36-
else:
37-
assert not np.shares_memory(get_array(df, "a"), arr_a)
3837

3938

4039
def test_clip_inplace(using_copy_on_write):
4140
df = DataFrame({"a": [1.5, 2, 3]})
4241
arr_a = get_array(df, "a")
4342
df.clip(lower=2, inplace=True)
4443

45-
# Clip not actually inplace right now but could be
46-
assert not np.shares_memory(get_array(df, "a"), arr_a)
44+
assert np.shares_memory(get_array(df, "a"), arr_a)
4745

4846
if using_copy_on_write:
4947
assert df._mgr._has_no_reference(0)

pandas/tests/frame/methods/test_clip.py

+7
Original file line numberDiff line numberDiff line change
@@ -164,3 +164,10 @@ def test_clip_with_na_args(self, float_frame):
164164
result = df.clip(lower=t, axis=0)
165165
expected = DataFrame({"col_0": [9, -3, 0, 6, 5], "col_1": [2, -4, 6, 8, 3]})
166166
tm.assert_frame_equal(result, expected)
167+
168+
def test_clip_int_data_with_float_bound(self):
169+
# GH51472
170+
df = DataFrame({"a": [1, 2, 3]})
171+
result = df.clip(lower=1.5)
172+
expected = DataFrame({"a": [1.5, 2.0, 3.0]})
173+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)