Skip to content

Commit f16141f

Browse files
jrebackTomAugspurger
authored andcommitted
PERF: improved clip performance (#16364)
closes #15400 (cherry picked from commit 42e2a87)
1 parent 3af2646 commit f16141f

File tree

4 files changed

+45
-3
lines changed

4 files changed

+45
-3
lines changed

asv_bench/benchmarks/series_methods.py

+11
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ def setup(self):
111111
def time_series_dropna_int64(self):
112112
self.s.dropna()
113113

114+
114115
class series_dropna_datetime(object):
115116
goal_time = 0.2
116117

@@ -120,3 +121,13 @@ def setup(self):
120121

121122
def time_series_dropna_datetime(self):
122123
self.s.dropna()
124+
125+
126+
class series_clip(object):
127+
goal_time = 0.2
128+
129+
def setup(self):
130+
self.s = pd.Series(np.random.randn(50))
131+
132+
def time_series_dropna_datetime(self):
133+
self.s.clip(0, 1)

doc/source/whatsnew/v0.20.2.txt

+1-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ Highlights include:
1919
Enhancements
2020
~~~~~~~~~~~~
2121

22-
- Unblocked access to additional compression types supported in pytables: 'blosc:blosclz, 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', 'blosc:zlib', 'blosc:zstd' (:issue:`14478`)
22+
- Unblocked access to additional compression types supported in pytables: 'blosc:blosclz, 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', 'blosc:zlib', 'blosc:zstd' (:issue:`14478`)
2323

2424
.. _whatsnew_0202.performance:
2525

@@ -30,7 +30,6 @@ Performance Improvements
3030
- Performance regression fix for MultiIndexes (:issue:`16319`, :issue:`16346`)
3131
- Improved performance of ``.clip()`` with scalar arguments (:issue:`15400`)
3232

33-
3433
.. _whatsnew_0202.bug_fixes:
3534

3635
Bug Fixes

pandas/core/generic.py

+32-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
_ensure_int64,
1515
needs_i8_conversion,
1616
is_scalar,
17+
is_number,
1718
is_integer, is_bool,
1819
is_bool_dtype,
1920
is_numeric_dtype,
@@ -4104,6 +4105,22 @@ def isnull(self):
41044105
def notnull(self):
41054106
return notnull(self).__finalize__(self)
41064107

4108+
def _clip_with_scalar(self, lower, upper):
4109+
4110+
if ((lower is not None and np.any(isnull(lower))) or
4111+
(upper is not None and np.any(isnull(upper)))):
4112+
raise ValueError("Cannot use an NA value as a clip threshold")
4113+
4114+
result = self.values
4115+
mask = isnull(result)
4116+
if upper is not None:
4117+
result = np.where(result >= upper, upper, result)
4118+
if lower is not None:
4119+
result = np.where(result <= lower, lower, result)
4120+
result[mask] = np.nan
4121+
return self._constructor(
4122+
result, **self._construct_axes_dict()).__finalize__(self)
4123+
41074124
def clip(self, lower=None, upper=None, axis=None, *args, **kwargs):
41084125
"""
41094126
Trim values at input threshold(s).
@@ -4122,26 +4139,29 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs):
41224139
Examples
41234140
--------
41244141
>>> df
4125-
0 1
4142+
0 1
41264143
0 0.335232 -1.256177
41274144
1 -1.367855 0.746646
41284145
2 0.027753 -1.176076
41294146
3 0.230930 -0.679613
41304147
4 1.261967 0.570967
4148+
41314149
>>> df.clip(-1.0, 0.5)
41324150
0 1
41334151
0 0.335232 -1.000000
41344152
1 -1.000000 0.500000
41354153
2 0.027753 -1.000000
41364154
3 0.230930 -0.679613
41374155
4 0.500000 0.500000
4156+
41384157
>>> t
41394158
0 -0.3
41404159
1 -0.2
41414160
2 -0.1
41424161
3 0.0
41434162
4 0.1
41444163
dtype: float64
4164+
41454165
>>> df.clip(t, t + 1, axis=0)
41464166
0 1
41474167
0 0.335232 -0.300000
@@ -4160,6 +4180,11 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs):
41604180
if is_scalar(lower) and is_scalar(upper):
41614181
lower, upper = min(lower, upper), max(lower, upper)
41624182

4183+
# fast-path for scalars
4184+
if ((lower is None or (is_scalar(lower) and is_number(lower))) and
4185+
(upper is None or (is_scalar(upper) and is_number(upper)))):
4186+
return self._clip_with_scalar(lower, upper)
4187+
41634188
result = self
41644189
if lower is not None:
41654190
result = result.clip_lower(lower, axis)
@@ -4189,6 +4214,9 @@ def clip_upper(self, threshold, axis=None):
41894214
if np.any(isnull(threshold)):
41904215
raise ValueError("Cannot use an NA value as a clip threshold")
41914216

4217+
if is_scalar(threshold) and is_number(threshold):
4218+
return self._clip_with_scalar(None, threshold)
4219+
41924220
subset = self.le(threshold, axis=axis) | isnull(self)
41934221
return self.where(subset, threshold, axis=axis)
41944222

@@ -4213,6 +4241,9 @@ def clip_lower(self, threshold, axis=None):
42134241
if np.any(isnull(threshold)):
42144242
raise ValueError("Cannot use an NA value as a clip threshold")
42154243

4244+
if is_scalar(threshold) and is_number(threshold):
4245+
return self._clip_with_scalar(threshold, None)
4246+
42164247
subset = self.ge(threshold, axis=axis) | isnull(self)
42174248
return self.where(subset, threshold, axis=axis)
42184249

pandas/tests/series/test_analytics.py

+1
Original file line numberDiff line numberDiff line change
@@ -1011,6 +1011,7 @@ def test_clip_against_series(self):
10111011

10121012
lower = Series([1.0, 2.0, 3.0])
10131013
upper = Series([1.5, 2.5, 3.5])
1014+
10141015
assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5]))
10151016
assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5]))
10161017

0 commit comments

Comments
 (0)