Skip to content

Commit e5fd3e0

Browse files
yl2526jreback
authored andcommitted
BUG: clip dataframe column-wise #15390 (#16504)
1 parent 9462379 commit e5fd3e0

File tree

4 files changed

+69
-22
lines changed

4 files changed

+69
-22
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ Reshaping
135135

136136
Numeric
137137
^^^^^^^
138+
- Bug in ``.clip()`` with ``axis=1`` and a list-like for ``threshold`` is passed; previously this raised ``ValueError`` (:issue:`15390`)
138139

139140

140141
Categorical

pandas/core/generic.py

+33-20
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
from pandas.compat.numpy import function as nv
5353
from pandas.compat import (map, zip, lzip, lrange, string_types,
5454
isidentifier, set_function_name, cPickle as pkl)
55+
from pandas.core.ops import _align_method_FRAME
5556
import pandas.core.nanops as nanops
5657
from pandas.util._decorators import Appender, Substitution, deprecate_kwarg
5758
from pandas.util._validators import validate_bool_kwarg
@@ -4413,6 +4414,34 @@ def _clip_with_scalar(self, lower, upper, inplace=False):
44134414
else:
44144415
return result
44154416

4417+
def _clip_with_one_bound(self, threshold, method, axis, inplace):
4418+
4419+
inplace = validate_bool_kwarg(inplace, 'inplace')
4420+
if axis is not None:
4421+
axis = self._get_axis_number(axis)
4422+
4423+
if np.any(isnull(threshold)):
4424+
raise ValueError("Cannot use an NA value as a clip threshold")
4425+
4426+
# method is self.le for upper bound and self.ge for lower bound
4427+
if is_scalar(threshold) and is_number(threshold):
4428+
if method.__name__ == 'le':
4429+
return self._clip_with_scalar(None, threshold, inplace=inplace)
4430+
return self._clip_with_scalar(threshold, None, inplace=inplace)
4431+
4432+
subset = method(threshold, axis=axis) | isnull(self)
4433+
4434+
# GH #15390
4435+
# In order for where method to work, the threshold must
4436+
# be transformed to NDFrame from other array like structure.
4437+
if (not isinstance(threshold, ABCSeries)) and is_list_like(threshold):
4438+
if isinstance(self, ABCSeries):
4439+
threshold = pd.Series(threshold, index=self.index)
4440+
else:
4441+
threshold = _align_method_FRAME(self, np.asarray(threshold),
4442+
axis)
4443+
return self.where(subset, threshold, axis=axis, inplace=inplace)
4444+
44164445
def clip(self, lower=None, upper=None, axis=None, inplace=False,
44174446
*args, **kwargs):
44184447
"""
@@ -4515,16 +4544,8 @@ def clip_upper(self, threshold, axis=None, inplace=False):
45154544
-------
45164545
clipped : same type as input
45174546
"""
4518-
if np.any(isnull(threshold)):
4519-
raise ValueError("Cannot use an NA value as a clip threshold")
4520-
4521-
if is_scalar(threshold) and is_number(threshold):
4522-
return self._clip_with_scalar(None, threshold, inplace=inplace)
4523-
4524-
inplace = validate_bool_kwarg(inplace, 'inplace')
4525-
4526-
subset = self.le(threshold, axis=axis) | isnull(self)
4527-
return self.where(subset, threshold, axis=axis, inplace=inplace)
4547+
return self._clip_with_one_bound(threshold, method=self.le,
4548+
axis=axis, inplace=inplace)
45284549

45294550
def clip_lower(self, threshold, axis=None, inplace=False):
45304551
"""
@@ -4547,16 +4568,8 @@ def clip_lower(self, threshold, axis=None, inplace=False):
45474568
-------
45484569
clipped : same type as input
45494570
"""
4550-
if np.any(isnull(threshold)):
4551-
raise ValueError("Cannot use an NA value as a clip threshold")
4552-
4553-
if is_scalar(threshold) and is_number(threshold):
4554-
return self._clip_with_scalar(threshold, None, inplace=inplace)
4555-
4556-
inplace = validate_bool_kwarg(inplace, 'inplace')
4557-
4558-
subset = self.ge(threshold, axis=axis) | isnull(self)
4559-
return self.where(subset, threshold, axis=axis, inplace=inplace)
4571+
return self._clip_with_one_bound(threshold, method=self.ge,
4572+
axis=axis, inplace=inplace)
45604573

45614574
def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
45624575
group_keys=True, squeeze=False, **kwargs):

pandas/tests/frame/test_analytics.py

+23-2
Original file line numberDiff line numberDiff line change
@@ -1892,12 +1892,33 @@ def test_clip_against_series(self, inplace):
18921892

18931893
tm.assert_series_equal(clipped_df.loc[mask, i], df.loc[mask, i])
18941894

1895-
def test_clip_against_frame(self):
1895+
@pytest.mark.parametrize("inplace", [True, False])
1896+
@pytest.mark.parametrize("lower", [[2, 3, 4], np.asarray([2, 3, 4])])
1897+
@pytest.mark.parametrize("axis,res", [
1898+
(0, [[2., 2., 3.], [4., 5., 6.], [7., 7., 7.]]),
1899+
(1, [[2., 3., 4.], [4., 5., 6.], [5., 6., 7.]])
1900+
])
1901+
def test_clip_against_list_like(self, inplace, lower, axis, res):
1902+
# GH #15390
1903+
original = self.simple.copy(deep=True)
1904+
1905+
result = original.clip(lower=lower, upper=[5, 6, 7],
1906+
axis=axis, inplace=inplace)
1907+
1908+
expected = pd.DataFrame(res,
1909+
columns=original.columns,
1910+
index=original.index)
1911+
if inplace:
1912+
result = original
1913+
tm.assert_frame_equal(result, expected, check_exact=True)
1914+
1915+
@pytest.mark.parametrize("axis", [0, 1, None])
1916+
def test_clip_against_frame(self, axis):
18961917
df = DataFrame(np.random.randn(1000, 2))
18971918
lb = DataFrame(np.random.randn(1000, 2))
18981919
ub = lb + 1
18991920

1900-
clipped_df = df.clip(lb, ub)
1921+
clipped_df = df.clip(lb, ub, axis=axis)
19011922

19021923
lb_mask = df <= lb
19031924
ub_mask = df >= ub

pandas/tests/series/test_analytics.py

+12
Original file line numberDiff line numberDiff line change
@@ -1015,6 +1015,18 @@ def test_clip_against_series(self):
10151015
assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5]))
10161016
assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5]))
10171017

1018+
@pytest.mark.parametrize("inplace", [True, False])
1019+
@pytest.mark.parametrize("upper", [[1, 2, 3], np.asarray([1, 2, 3])])
1020+
def test_clip_against_list_like(self, inplace, upper):
1021+
# GH #15390
1022+
original = pd.Series([5, 6, 7])
1023+
result = original.clip(upper=upper, inplace=inplace)
1024+
expected = pd.Series([1, 2, 3])
1025+
1026+
if inplace:
1027+
result = original
1028+
tm.assert_series_equal(result, expected, check_exact=True)
1029+
10181030
def test_clip_with_datetimes(self):
10191031

10201032
# GH 11838

0 commit comments

Comments
 (0)