From f372c9ece6cc66c8d42397feacf5081b29787336 Mon Sep 17 00:00:00 2001 From: Christopher Whelan Date: Thu, 10 Jan 2019 13:34:29 -0800 Subject: [PATCH] PERF: fix some of .clip() performance regression by using numpy arrays where possible --- asv_bench/benchmarks/series_methods.py | 8 +++++--- pandas/core/generic.py | 18 ++++++++++++------ 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py index 5b0981dc10a8a..f7d0083b86a01 100644 --- a/asv_bench/benchmarks/series_methods.py +++ b/asv_bench/benchmarks/series_methods.py @@ -140,11 +140,13 @@ def time_map(self, mapper): class Clip(object): + params = [50, 1000, 10**5] + param_names = ['n'] - def setup(self): - self.s = Series(np.random.randn(50)) + def setup(self, n): + self.s = Series(np.random.randn(n)) - def time_clip(self): + def time_clip(self, n): self.s.clip(0, 1) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a0ee9cb253fef..2b97661fe9ec3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7148,12 +7148,18 @@ def _clip_with_scalar(self, lower, upper, inplace=False): raise ValueError("Cannot use an NA value as a clip threshold") result = self - if upper is not None: - subset = self.le(upper, axis=None) | isna(result) - result = result.where(subset, upper, axis=None, inplace=False) - if lower is not None: - subset = self.ge(lower, axis=None) | isna(result) - result = result.where(subset, lower, axis=None, inplace=False) + mask = isna(self.values) + + with np.errstate(all='ignore'): + if upper is not None: + subset = self.to_numpy() <= upper + result = result.where(subset, upper, axis=None, inplace=False) + if lower is not None: + subset = self.to_numpy() >= lower + result = result.where(subset, lower, axis=None, inplace=False) + + if np.any(mask): + result[mask] = np.nan if inplace: self._update_inplace(result)