Skip to content

Allow clip{,_lower,_upper} to use array-like thresholds (GH 6966) #9647

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 28, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.16.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Enhancements

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you changed the mode of this file, can you revert pls.

- Added ``StringMethods.capitalize()`` and ``swapcase`` which behave as the same as standard ``str`` (:issue:`9766`)
- Added ``StringMethods`` (.str accessor) to ``Index`` (:issue:`9068`)
- Allow clip, clip_lower, and clip_upper to accept array-like arguments as thresholds (:issue:`6966`). These methods now have an ``axis`` parameter which determines how the Series or DataFrame will be aligned with the threshold(s).

The ``.str`` accessor is now available for both ``Series`` and ``Index``.

Expand Down
76 changes: 62 additions & 14 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2821,37 +2821,77 @@ def notnull(self):
"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pls make sure that you have core.filemode=False in git

e.g. git config global --list (then turn it off). you generally don't change file modes (as they are set so that nose works correctly)

return notnull(self).__finalize__(self)

def clip(self, lower=None, upper=None, out=None):
def clip(self, lower=None, upper=None, out=None, axis=None):
"""
Trim values at input threshold(s)

Parameters
----------
lower : float, default None
upper : float, default None
lower : float or array_like, default None
upper : float or array_like, default None
axis : int or string axis name, optional
Align object with lower and upper along the given axis.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a doc-string example here (have a look at a function, say DataFrame.select_dtypes for how to format this in the dco-string)

Returns
-------
clipped : Series

Examples
--------
>>> df
0 1
0 0.335232 -1.256177
1 -1.367855 0.746646
2 0.027753 -1.176076
3 0.230930 -0.679613
4 1.261967 0.570967
>>> df.clip(-1.0, 0.5)
0 1
0 0.335232 -1.000000
1 -1.000000 0.500000
2 0.027753 -1.000000
3 0.230930 -0.679613
4 0.500000 0.500000
>>> t
0 -0.3
1 -0.2
2 -0.1
3 0.0
4 0.1
dtype: float64
>>> df.clip(t, t + 1, axis=0)
0 1
0 0.335232 -0.300000
1 -0.200000 0.746646
2 0.027753 -0.100000
3 0.230930 0.000000
4 1.100000 0.570967
"""
if out is not None: # pragma: no cover
raise Exception('out argument is not supported yet')

# GH 2747 (arguments were reversed)
if lower is not None and upper is not None:
lower, upper = min(lower, upper), max(lower, upper)
if lib.isscalar(lower) and lib.isscalar(upper):
lower, upper = min(lower, upper), max(lower, upper)

result = self
if lower is not None:
result = result.clip_lower(lower)
result = result.clip_lower(lower, axis)
if upper is not None:
result = result.clip_upper(upper)
result = result.clip_upper(upper, axis)

return result

def clip_upper(self, threshold):
def clip_upper(self, threshold, axis=None):
"""
Return copy of input with values above given value truncated
Return copy of input with values above given value(s) truncated

Parameters
----------
threshold : float or array_like
axis : int or string axis name, optional
Align object with threshold along the given axis.

See also
--------
Expand All @@ -2861,14 +2901,21 @@ def clip_upper(self, threshold):
-------
clipped : same type as input
"""
if isnull(threshold):
if np.any(isnull(threshold)):
raise ValueError("Cannot use an NA value as a clip threshold")

return self.where((self <= threshold) | isnull(self), threshold)
subset = self.le(threshold, axis=axis) | isnull(self)
return self.where(subset, threshold, axis=axis)

def clip_lower(self, threshold):
def clip_lower(self, threshold, axis=None):
"""
Return copy of the input with values below given value truncated
Return copy of the input with values below given value(s) truncated

Parameters
----------
threshold : float or array_like
axis : int or string axis name, optional
Align object with threshold along the given axis.

See also
--------
Expand All @@ -2878,10 +2925,11 @@ def clip_lower(self, threshold):
-------
clipped : same type as input
"""
if isnull(threshold):
if np.any(isnull(threshold)):
raise ValueError("Cannot use an NA value as a clip threshold")

return self.where((self >= threshold) | isnull(self), threshold)
subset = self.ge(threshold, axis=axis) | isnull(self)
return self.where(subset, threshold, axis=axis)

def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
group_keys=True, squeeze=False):
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,11 @@ def na_op(x, y):

return result

def wrapper(self, other):
def wrapper(self, other, axis=None):
# Validate the axis parameter
if axis is not None:
self._get_axis_number(axis)

if isinstance(other, pd.Series):
name = _maybe_match_name(self, other)
if len(self) != len(other):
Expand Down
33 changes: 33 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -11394,6 +11394,39 @@ def test_dataframe_clip(self):
self.assertTrue((clipped_df.values[ub_mask] == ub).all() == True)
self.assertTrue((clipped_df.values[mask] == df.values[mask]).all() == True)

def test_clip_against_series(self):
# GH #6966

df = DataFrame(np.random.randn(1000, 2))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add the issue number as a comment here

lb = Series(np.random.randn(1000))
ub = lb + 1

clipped_df = df.clip(lb, ub, axis=0)

for i in range(2):
lb_mask = df.iloc[:, i] <= lb
ub_mask = df.iloc[:, i] >= ub
mask = ~lb_mask & ~ub_mask

assert_series_equal(clipped_df.loc[lb_mask, i], lb[lb_mask])
assert_series_equal(clipped_df.loc[ub_mask, i], ub[ub_mask])
assert_series_equal(clipped_df.loc[mask, i], df.loc[mask, i])

def test_clip_against_frame(self):
df = DataFrame(np.random.randn(1000, 2))
lb = DataFrame(np.random.randn(1000, 2))
ub = lb + 1

clipped_df = df.clip(lb, ub)

lb_mask = df <= lb
ub_mask = df >= ub
mask = ~lb_mask & ~ub_mask

assert_frame_equal(clipped_df[lb_mask], lb[lb_mask])
assert_frame_equal(clipped_df[ub_mask], ub[ub_mask])
assert_frame_equal(clipped_df[mask], df[mask])

def test_get_X_columns(self):
# numeric and object columns

Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -5037,6 +5037,20 @@ def test_clip_types_and_nulls(self):
self.assertEqual(list(isnull(s)), list(isnull(l)))
self.assertEqual(list(isnull(s)), list(isnull(u)))

def test_clip_against_series(self):
# GH #6966

s = Series([1.0, 1.0, 4.0])
threshold = Series([1.0, 2.0, 3.0])

assert_series_equal(s.clip_lower(threshold), Series([1.0, 2.0, 4.0]))
assert_series_equal(s.clip_upper(threshold), Series([1.0, 1.0, 3.0]))

lower = Series([1.0, 2.0, 3.0])
upper = Series([1.5, 2.5, 3.5])
assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5]))
assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5]))

def test_valid(self):
ts = self.ts.copy()
ts[::2] = np.NaN
Expand Down