diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt index c0408c4123eab..b99f3e2cd7059 100755 --- a/doc/source/whatsnew/v0.16.1.txt +++ b/doc/source/whatsnew/v0.16.1.txt @@ -23,6 +23,7 @@ Enhancements - Added ``StringMethods.capitalize()`` and ``swapcase`` which behave as the same as standard ``str`` (:issue:`9766`) - Added ``StringMethods`` (.str accessor) to ``Index`` (:issue:`9068`) +- Allow clip, clip_lower, and clip_upper to accept array-like arguments as thresholds (:issue:`6966`). These methods now have an ``axis`` parameter which determines how the Series or DataFrame will be aligned with the threshold(s). The ``.str`` accessor is now available for both ``Series`` and ``Index``. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 681cfc0f7a416..d7defd2b417da 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2821,37 +2821,77 @@ def notnull(self): """ return notnull(self).__finalize__(self) - def clip(self, lower=None, upper=None, out=None): + def clip(self, lower=None, upper=None, out=None, axis=None): """ Trim values at input threshold(s) Parameters ---------- - lower : float, default None - upper : float, default None + lower : float or array_like, default None + upper : float or array_like, default None + axis : int or string axis name, optional + Align object with lower and upper along the given axis. Returns ------- clipped : Series + + Examples + -------- + >>> df + 0 1 + 0 0.335232 -1.256177 + 1 -1.367855 0.746646 + 2 0.027753 -1.176076 + 3 0.230930 -0.679613 + 4 1.261967 0.570967 + >>> df.clip(-1.0, 0.5) + 0 1 + 0 0.335232 -1.000000 + 1 -1.000000 0.500000 + 2 0.027753 -1.000000 + 3 0.230930 -0.679613 + 4 0.500000 0.500000 + >>> t + 0 -0.3 + 1 -0.2 + 2 -0.1 + 3 0.0 + 4 0.1 + dtype: float64 + >>> df.clip(t, t + 1, axis=0) + 0 1 + 0 0.335232 -0.300000 + 1 -0.200000 0.746646 + 2 0.027753 -0.100000 + 3 0.230930 0.000000 + 4 1.100000 0.570967 """ if out is not None: # pragma: no cover raise Exception('out argument is not supported yet') # GH 2747 (arguments were reversed) if lower is not None and upper is not None: - lower, upper = min(lower, upper), max(lower, upper) + if lib.isscalar(lower) and lib.isscalar(upper): + lower, upper = min(lower, upper), max(lower, upper) result = self if lower is not None: - result = result.clip_lower(lower) + result = result.clip_lower(lower, axis) if upper is not None: - result = result.clip_upper(upper) + result = result.clip_upper(upper, axis) return result - def clip_upper(self, threshold): + def clip_upper(self, threshold, axis=None): """ - Return copy of input with values above given value truncated + Return copy of input with values above given value(s) truncated + + Parameters + ---------- + threshold : float or array_like + axis : int or string axis name, optional + Align object with threshold along the given axis. See also -------- @@ -2861,14 +2901,21 @@ def clip_upper(self, threshold): ------- clipped : same type as input """ - if isnull(threshold): + if np.any(isnull(threshold)): raise ValueError("Cannot use an NA value as a clip threshold") - return self.where((self <= threshold) | isnull(self), threshold) + subset = self.le(threshold, axis=axis) | isnull(self) + return self.where(subset, threshold, axis=axis) - def clip_lower(self, threshold): + def clip_lower(self, threshold, axis=None): """ - Return copy of the input with values below given value truncated + Return copy of the input with values below given value(s) truncated + + Parameters + ---------- + threshold : float or array_like + axis : int or string axis name, optional + Align object with threshold along the given axis. See also -------- @@ -2878,10 +2925,11 @@ def clip_lower(self, threshold): ------- clipped : same type as input """ - if isnull(threshold): + if np.any(isnull(threshold)): raise ValueError("Cannot use an NA value as a clip threshold") - return self.where((self >= threshold) | isnull(self), threshold) + subset = self.ge(threshold, axis=axis) | isnull(self) + return self.where(subset, threshold, axis=axis) def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False): diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 2af9cd43faaef..a4c9bff3dd97f 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -571,7 +571,11 @@ def na_op(x, y): return result - def wrapper(self, other): + def wrapper(self, other, axis=None): + # Validate the axis parameter + if axis is not None: + self._get_axis_number(axis) + if isinstance(other, pd.Series): name = _maybe_match_name(self, other) if len(self) != len(other): diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 5912ccb1494fe..e88bf9da9791d 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -11394,6 +11394,39 @@ def test_dataframe_clip(self): self.assertTrue((clipped_df.values[ub_mask] == ub).all() == True) self.assertTrue((clipped_df.values[mask] == df.values[mask]).all() == True) + def test_clip_against_series(self): + # GH #6966 + + df = DataFrame(np.random.randn(1000, 2)) + lb = Series(np.random.randn(1000)) + ub = lb + 1 + + clipped_df = df.clip(lb, ub, axis=0) + + for i in range(2): + lb_mask = df.iloc[:, i] <= lb + ub_mask = df.iloc[:, i] >= ub + mask = ~lb_mask & ~ub_mask + + assert_series_equal(clipped_df.loc[lb_mask, i], lb[lb_mask]) + assert_series_equal(clipped_df.loc[ub_mask, i], ub[ub_mask]) + assert_series_equal(clipped_df.loc[mask, i], df.loc[mask, i]) + + def test_clip_against_frame(self): + df = DataFrame(np.random.randn(1000, 2)) + lb = DataFrame(np.random.randn(1000, 2)) + ub = lb + 1 + + clipped_df = df.clip(lb, ub) + + lb_mask = df <= lb + ub_mask = df >= ub + mask = ~lb_mask & ~ub_mask + + assert_frame_equal(clipped_df[lb_mask], lb[lb_mask]) + assert_frame_equal(clipped_df[ub_mask], ub[ub_mask]) + assert_frame_equal(clipped_df[mask], df[mask]) + def test_get_X_columns(self): # numeric and object columns diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index f1a9e23796804..aa95986be0722 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -5037,6 +5037,20 @@ def test_clip_types_and_nulls(self): self.assertEqual(list(isnull(s)), list(isnull(l))) self.assertEqual(list(isnull(s)), list(isnull(u))) + def test_clip_against_series(self): + # GH #6966 + + s = Series([1.0, 1.0, 4.0]) + threshold = Series([1.0, 2.0, 3.0]) + + assert_series_equal(s.clip_lower(threshold), Series([1.0, 2.0, 4.0])) + assert_series_equal(s.clip_upper(threshold), Series([1.0, 1.0, 3.0])) + + lower = Series([1.0, 2.0, 3.0]) + upper = Series([1.5, 2.5, 3.5]) + assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5])) + assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5])) + def test_valid(self): ts = self.ts.copy() ts[::2] = np.NaN