Skip to content

Commit 73d619e

Browse files
Evan Wrightevanpw
Evan Wright
authored andcommitted
ENH: Allow clip, clip_lower, and clip_upper to use array-like thresholds (GH 6966)
1 parent 3703f74 commit 73d619e

File tree

5 files changed

+115
-15
lines changed

5 files changed

+115
-15
lines changed

doc/source/whatsnew/v0.16.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Enhancements
1919

2020
- Added ``StringMethods.capitalize()`` and ``swapcase`` which behave as the same as standard ``str`` (:issue:`9766`)
2121
- Added ``StringMethods`` (.str accessor) to ``Index`` (:issue:`9068`)
22+
- Allow clip, clip_lower, and clip_upper to accept array-like arguments as thresholds (:issue:`6966`). These methods now have an ``axis`` parameter which determines how the Series or DataFrame will be aligned with the threshold(s).
2223

2324
The `.str` accessor is now available for both `Series` and `Index`.
2425

pandas/core/generic.py

+62-14
Original file line numberDiff line numberDiff line change
@@ -2817,37 +2817,77 @@ def notnull(self):
28172817
"""
28182818
return notnull(self).__finalize__(self)
28192819

2820-
def clip(self, lower=None, upper=None, out=None):
2820+
def clip(self, lower=None, upper=None, out=None, axis=None):
28212821
"""
28222822
Trim values at input threshold(s)
28232823
28242824
Parameters
28252825
----------
2826-
lower : float, default None
2827-
upper : float, default None
2826+
lower : float or array_like, default None
2827+
upper : float or array_like, default None
2828+
axis : int or string axis name, optional
2829+
Align object with lower and upper along the given axis.
28282830
28292831
Returns
28302832
-------
28312833
clipped : Series
2834+
2835+
Examples
2836+
--------
2837+
>>> df
2838+
0 1
2839+
0 0.335232 -1.256177
2840+
1 -1.367855 0.746646
2841+
2 0.027753 -1.176076
2842+
3 0.230930 -0.679613
2843+
4 1.261967 0.570967
2844+
>>> df.clip(-1.0, 0.5)
2845+
0 1
2846+
0 0.335232 -1.000000
2847+
1 -1.000000 0.500000
2848+
2 0.027753 -1.000000
2849+
3 0.230930 -0.679613
2850+
4 0.500000 0.500000
2851+
>>> t
2852+
0 -0.3
2853+
1 -0.2
2854+
2 -0.1
2855+
3 0.0
2856+
4 0.1
2857+
dtype: float64
2858+
>>> df.clip(t, t + 1, axis=0)
2859+
0 1
2860+
0 0.335232 -0.300000
2861+
1 -0.200000 0.746646
2862+
2 0.027753 -0.100000
2863+
3 0.230930 0.000000
2864+
4 1.100000 0.570967
28322865
"""
28332866
if out is not None: # pragma: no cover
28342867
raise Exception('out argument is not supported yet')
28352868

28362869
# GH 2747 (arguments were reversed)
28372870
if lower is not None and upper is not None:
2838-
lower, upper = min(lower, upper), max(lower, upper)
2871+
if lib.isscalar(lower) and lib.isscalar(upper):
2872+
lower, upper = min(lower, upper), max(lower, upper)
28392873

28402874
result = self
28412875
if lower is not None:
2842-
result = result.clip_lower(lower)
2876+
result = result.clip_lower(lower, axis)
28432877
if upper is not None:
2844-
result = result.clip_upper(upper)
2878+
result = result.clip_upper(upper, axis)
28452879

28462880
return result
28472881

2848-
def clip_upper(self, threshold):
2882+
def clip_upper(self, threshold, axis=None):
28492883
"""
2850-
Return copy of input with values above given value truncated
2884+
Return copy of input with values above given value(s) truncated
2885+
2886+
Parameters
2887+
----------
2888+
threshold : float or array_like
2889+
axis : int or string axis name, optional
2890+
Align object with threshold along the given axis.
28512891
28522892
See also
28532893
--------
@@ -2857,14 +2897,21 @@ def clip_upper(self, threshold):
28572897
-------
28582898
clipped : same type as input
28592899
"""
2860-
if isnull(threshold):
2900+
if np.any(isnull(threshold)):
28612901
raise ValueError("Cannot use an NA value as a clip threshold")
28622902

2863-
return self.where((self <= threshold) | isnull(self), threshold)
2903+
subset = self.le(threshold, axis=axis) | isnull(self)
2904+
return self.where(subset, threshold, axis=axis)
28642905

2865-
def clip_lower(self, threshold):
2906+
def clip_lower(self, threshold, axis=None):
28662907
"""
2867-
Return copy of the input with values below given value truncated
2908+
Return copy of the input with values below given value(s) truncated
2909+
2910+
Parameters
2911+
----------
2912+
threshold : float or array_like
2913+
axis : int or string axis name, optional
2914+
Align object with threshold along the given axis.
28682915
28692916
See also
28702917
--------
@@ -2874,10 +2921,11 @@ def clip_lower(self, threshold):
28742921
-------
28752922
clipped : same type as input
28762923
"""
2877-
if isnull(threshold):
2924+
if np.any(isnull(threshold)):
28782925
raise ValueError("Cannot use an NA value as a clip threshold")
28792926

2880-
return self.where((self >= threshold) | isnull(self), threshold)
2927+
subset = self.ge(threshold, axis=axis) | isnull(self)
2928+
return self.where(subset, threshold, axis=axis)
28812929

28822930
def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
28832931
group_keys=True, squeeze=False):

pandas/core/ops.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -571,7 +571,11 @@ def na_op(x, y):
571571

572572
return result
573573

574-
def wrapper(self, other):
574+
def wrapper(self, other, axis=None):
575+
# Validate the axis parameter
576+
if axis is not None:
577+
self._get_axis_number(axis)
578+
575579
if isinstance(other, pd.Series):
576580
name = _maybe_match_name(self, other)
577581
if len(self) != len(other):

pandas/tests/test_frame.py

+33
Original file line numberDiff line numberDiff line change
@@ -11345,6 +11345,39 @@ def test_dataframe_clip(self):
1134511345
self.assertTrue((clipped_df.values[ub_mask] == ub).all() == True)
1134611346
self.assertTrue((clipped_df.values[mask] == df.values[mask]).all() == True)
1134711347

11348+
def test_clip_against_series(self):
11349+
# GH #6966
11350+
11351+
df = DataFrame(np.random.randn(1000, 2))
11352+
lb = Series(np.random.randn(1000))
11353+
ub = lb + 1
11354+
11355+
clipped_df = df.clip(lb, ub, axis=0)
11356+
11357+
for i in range(2):
11358+
lb_mask = df.iloc[:, i] <= lb
11359+
ub_mask = df.iloc[:, i] >= ub
11360+
mask = ~lb_mask & ~ub_mask
11361+
11362+
assert_series_equal(clipped_df.loc[lb_mask, i], lb[lb_mask])
11363+
assert_series_equal(clipped_df.loc[ub_mask, i], ub[ub_mask])
11364+
assert_series_equal(clipped_df.loc[mask, i], df.loc[mask, i])
11365+
11366+
def test_clip_against_frame(self):
11367+
df = DataFrame(np.random.randn(1000, 2))
11368+
lb = DataFrame(np.random.randn(1000, 2))
11369+
ub = lb + 1
11370+
11371+
clipped_df = df.clip(lb, ub)
11372+
11373+
lb_mask = df <= lb
11374+
ub_mask = df >= ub
11375+
mask = ~lb_mask & ~ub_mask
11376+
11377+
assert_frame_equal(clipped_df[lb_mask], lb[lb_mask])
11378+
assert_frame_equal(clipped_df[ub_mask], ub[ub_mask])
11379+
assert_frame_equal(clipped_df[mask], df[mask])
11380+
1134811381
def test_get_X_columns(self):
1134911382
# numeric and object columns
1135011383

pandas/tests/test_series.py

+14
Original file line numberDiff line numberDiff line change
@@ -5017,6 +5017,20 @@ def test_clip_types_and_nulls(self):
50175017
self.assertEqual(list(isnull(s)), list(isnull(l)))
50185018
self.assertEqual(list(isnull(s)), list(isnull(u)))
50195019

5020+
def test_clip_against_series(self):
5021+
# GH #6966
5022+
5023+
s = Series([1.0, 1.0, 4.0])
5024+
threshold = Series([1.0, 2.0, 3.0])
5025+
5026+
assert_series_equal(s.clip_lower(threshold), Series([1.0, 2.0, 4.0]))
5027+
assert_series_equal(s.clip_upper(threshold), Series([1.0, 1.0, 3.0]))
5028+
5029+
lower = Series([1.0, 2.0, 3.0])
5030+
upper = Series([1.5, 2.5, 3.5])
5031+
assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5]))
5032+
assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5]))
5033+
50205034
def test_valid(self):
50215035
ts = self.ts.copy()
50225036
ts[::2] = np.NaN

0 commit comments

Comments
 (0)