Skip to content

Commit 1528859

Browse files
Evan Wrightevanpw
Evan Wright
authored andcommitted
ENH: Allow clip, clip_lower, and clip_upper to use array-like thresholds (GH 6966)
1 parent 990972b commit 1528859

File tree

5 files changed

+115
-15
lines changed

5 files changed

+115
-15
lines changed

doc/source/whatsnew/v0.16.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ Enhancements
2323

2424
- Added ``StringMethods.capitalize()`` and ``swapcase`` which behave as the same as standard ``str`` (:issue:`9766`)
2525
- Added ``StringMethods`` (.str accessor) to ``Index`` (:issue:`9068`)
26+
- Allow clip, clip_lower, and clip_upper to accept array-like arguments as thresholds (:issue:`6966`). These methods now have an ``axis`` parameter which determines how the Series or DataFrame will be aligned with the threshold(s).
2627

2728
The ``.str`` accessor is now available for both ``Series`` and ``Index``.
2829

pandas/core/generic.py

+62-14
Original file line numberDiff line numberDiff line change
@@ -2821,37 +2821,77 @@ def notnull(self):
28212821
"""
28222822
return notnull(self).__finalize__(self)
28232823

2824-
def clip(self, lower=None, upper=None, out=None):
2824+
def clip(self, lower=None, upper=None, out=None, axis=None):
28252825
"""
28262826
Trim values at input threshold(s)
28272827
28282828
Parameters
28292829
----------
2830-
lower : float, default None
2831-
upper : float, default None
2830+
lower : float or array_like, default None
2831+
upper : float or array_like, default None
2832+
axis : int or string axis name, optional
2833+
Align object with lower and upper along the given axis.
28322834
28332835
Returns
28342836
-------
28352837
clipped : Series
2838+
2839+
Examples
2840+
--------
2841+
>>> df
2842+
0 1
2843+
0 0.335232 -1.256177
2844+
1 -1.367855 0.746646
2845+
2 0.027753 -1.176076
2846+
3 0.230930 -0.679613
2847+
4 1.261967 0.570967
2848+
>>> df.clip(-1.0, 0.5)
2849+
0 1
2850+
0 0.335232 -1.000000
2851+
1 -1.000000 0.500000
2852+
2 0.027753 -1.000000
2853+
3 0.230930 -0.679613
2854+
4 0.500000 0.500000
2855+
>>> t
2856+
0 -0.3
2857+
1 -0.2
2858+
2 -0.1
2859+
3 0.0
2860+
4 0.1
2861+
dtype: float64
2862+
>>> df.clip(t, t + 1, axis=0)
2863+
0 1
2864+
0 0.335232 -0.300000
2865+
1 -0.200000 0.746646
2866+
2 0.027753 -0.100000
2867+
3 0.230930 0.000000
2868+
4 1.100000 0.570967
28362869
"""
28372870
if out is not None: # pragma: no cover
28382871
raise Exception('out argument is not supported yet')
28392872

28402873
# GH 2747 (arguments were reversed)
28412874
if lower is not None and upper is not None:
2842-
lower, upper = min(lower, upper), max(lower, upper)
2875+
if lib.isscalar(lower) and lib.isscalar(upper):
2876+
lower, upper = min(lower, upper), max(lower, upper)
28432877

28442878
result = self
28452879
if lower is not None:
2846-
result = result.clip_lower(lower)
2880+
result = result.clip_lower(lower, axis)
28472881
if upper is not None:
2848-
result = result.clip_upper(upper)
2882+
result = result.clip_upper(upper, axis)
28492883

28502884
return result
28512885

2852-
def clip_upper(self, threshold):
2886+
def clip_upper(self, threshold, axis=None):
28532887
"""
2854-
Return copy of input with values above given value truncated
2888+
Return copy of input with values above given value(s) truncated
2889+
2890+
Parameters
2891+
----------
2892+
threshold : float or array_like
2893+
axis : int or string axis name, optional
2894+
Align object with threshold along the given axis.
28552895
28562896
See also
28572897
--------
@@ -2861,14 +2901,21 @@ def clip_upper(self, threshold):
28612901
-------
28622902
clipped : same type as input
28632903
"""
2864-
if isnull(threshold):
2904+
if np.any(isnull(threshold)):
28652905
raise ValueError("Cannot use an NA value as a clip threshold")
28662906

2867-
return self.where((self <= threshold) | isnull(self), threshold)
2907+
subset = self.le(threshold, axis=axis) | isnull(self)
2908+
return self.where(subset, threshold, axis=axis)
28682909

2869-
def clip_lower(self, threshold):
2910+
def clip_lower(self, threshold, axis=None):
28702911
"""
2871-
Return copy of the input with values below given value truncated
2912+
Return copy of the input with values below given value(s) truncated
2913+
2914+
Parameters
2915+
----------
2916+
threshold : float or array_like
2917+
axis : int or string axis name, optional
2918+
Align object with threshold along the given axis.
28722919
28732920
See also
28742921
--------
@@ -2878,10 +2925,11 @@ def clip_lower(self, threshold):
28782925
-------
28792926
clipped : same type as input
28802927
"""
2881-
if isnull(threshold):
2928+
if np.any(isnull(threshold)):
28822929
raise ValueError("Cannot use an NA value as a clip threshold")
28832930

2884-
return self.where((self >= threshold) | isnull(self), threshold)
2931+
subset = self.ge(threshold, axis=axis) | isnull(self)
2932+
return self.where(subset, threshold, axis=axis)
28852933

28862934
def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
28872935
group_keys=True, squeeze=False):

pandas/core/ops.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -571,7 +571,11 @@ def na_op(x, y):
571571

572572
return result
573573

574-
def wrapper(self, other):
574+
def wrapper(self, other, axis=None):
575+
# Validate the axis parameter
576+
if axis is not None:
577+
self._get_axis_number(axis)
578+
575579
if isinstance(other, pd.Series):
576580
name = _maybe_match_name(self, other)
577581
if len(self) != len(other):

pandas/tests/test_frame.py

+33
Original file line numberDiff line numberDiff line change
@@ -11394,6 +11394,39 @@ def test_dataframe_clip(self):
1139411394
self.assertTrue((clipped_df.values[ub_mask] == ub).all() == True)
1139511395
self.assertTrue((clipped_df.values[mask] == df.values[mask]).all() == True)
1139611396

11397+
def test_clip_against_series(self):
11398+
# GH #6966
11399+
11400+
df = DataFrame(np.random.randn(1000, 2))
11401+
lb = Series(np.random.randn(1000))
11402+
ub = lb + 1
11403+
11404+
clipped_df = df.clip(lb, ub, axis=0)
11405+
11406+
for i in range(2):
11407+
lb_mask = df.iloc[:, i] <= lb
11408+
ub_mask = df.iloc[:, i] >= ub
11409+
mask = ~lb_mask & ~ub_mask
11410+
11411+
assert_series_equal(clipped_df.loc[lb_mask, i], lb[lb_mask])
11412+
assert_series_equal(clipped_df.loc[ub_mask, i], ub[ub_mask])
11413+
assert_series_equal(clipped_df.loc[mask, i], df.loc[mask, i])
11414+
11415+
def test_clip_against_frame(self):
11416+
df = DataFrame(np.random.randn(1000, 2))
11417+
lb = DataFrame(np.random.randn(1000, 2))
11418+
ub = lb + 1
11419+
11420+
clipped_df = df.clip(lb, ub)
11421+
11422+
lb_mask = df <= lb
11423+
ub_mask = df >= ub
11424+
mask = ~lb_mask & ~ub_mask
11425+
11426+
assert_frame_equal(clipped_df[lb_mask], lb[lb_mask])
11427+
assert_frame_equal(clipped_df[ub_mask], ub[ub_mask])
11428+
assert_frame_equal(clipped_df[mask], df[mask])
11429+
1139711430
def test_get_X_columns(self):
1139811431
# numeric and object columns
1139911432

pandas/tests/test_series.py

+14
Original file line numberDiff line numberDiff line change
@@ -5037,6 +5037,20 @@ def test_clip_types_and_nulls(self):
50375037
self.assertEqual(list(isnull(s)), list(isnull(l)))
50385038
self.assertEqual(list(isnull(s)), list(isnull(u)))
50395039

5040+
def test_clip_against_series(self):
5041+
# GH #6966
5042+
5043+
s = Series([1.0, 1.0, 4.0])
5044+
threshold = Series([1.0, 2.0, 3.0])
5045+
5046+
assert_series_equal(s.clip_lower(threshold), Series([1.0, 2.0, 4.0]))
5047+
assert_series_equal(s.clip_upper(threshold), Series([1.0, 1.0, 3.0]))
5048+
5049+
lower = Series([1.0, 2.0, 3.0])
5050+
upper = Series([1.5, 2.5, 3.5])
5051+
assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5]))
5052+
assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5]))
5053+
50405054
def test_valid(self):
50415055
ts = self.ts.copy()
50425056
ts[::2] = np.NaN

0 commit comments

Comments
 (0)