Skip to content

Commit cee6475

Browse files
DriesSchaumontJulianWgs
authored andcommitted
Fix 40420: Interpret NaN in clip() as no bound. (pandas-dev#40927)
1 parent 48e7119 commit cee6475

File tree

4 files changed

+64
-11
lines changed

4 files changed

+64
-11
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -910,6 +910,7 @@ Other
910910
- Bug in :meth:`DataFrame.equals`, :meth:`Series.equals`, :meth:`Index.equals` with object-dtype containing ``np.datetime64("NaT")`` or ``np.timedelta64("NaT")`` (:issue:`39650`)
911911
- Bug in :func:`pandas.util.show_versions` where console JSON output was not proper JSON (:issue:`39701`)
912912
- Bug in :meth:`DataFrame.convert_dtypes` incorrectly raised ValueError when called on an empty DataFrame (:issue:`40393`)
913+
- Bug in :meth:`DataFrame.clip` not interpreting missing values as no threshold (:issue:`40420`)
913914

914915
.. ---------------------------------------------------------------------------
915916

pandas/core/generic.py

+45-6
Original file line numberDiff line numberDiff line change
@@ -7341,8 +7341,6 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace):
73417341
return self._clip_with_scalar(None, threshold, inplace=inplace)
73427342
return self._clip_with_scalar(threshold, None, inplace=inplace)
73437343

7344-
subset = method(threshold, axis=axis) | isna(self)
7345-
73467344
# GH #15390
73477345
# In order for where method to work, the threshold must
73487346
# be transformed to NDFrame from other array like structure.
@@ -7351,6 +7349,18 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace):
73517349
threshold = self._constructor(threshold, index=self.index)
73527350
else:
73537351
threshold = align_method_FRAME(self, threshold, axis, flex=None)[1]
7352+
7353+
# GH 40420
7354+
# Treat missing thresholds as no bounds, not clipping the values
7355+
if is_list_like(threshold):
7356+
fill_value = np.inf if method.__name__ == "le" else -np.inf
7357+
threshold_inf = threshold.fillna(fill_value)
7358+
else:
7359+
threshold_inf = threshold
7360+
7361+
subset = method(threshold_inf, axis=axis) | isna(self)
7362+
7363+
# GH 40420
73547364
return self.where(subset, threshold, axis=axis, inplace=inplace)
73557365

73567366
@overload
@@ -7482,10 +7492,12 @@ def clip(
74827492
----------
74837493
lower : float or array_like, default None
74847494
Minimum threshold value. All values below this
7485-
threshold will be set to it.
7495+
threshold will be set to it. A missing
7496+
threshold (e.g `NA`) will not clip the value.
74867497
upper : float or array_like, default None
74877498
Maximum threshold value. All values above this
7488-
threshold will be set to it.
7499+
threshold will be set to it. A missing
7500+
threshold (e.g `NA`) will not clip the value.
74897501
axis : int or str axis name, optional
74907502
Align object with lower and upper along the given axis.
74917503
inplace : bool, default False
@@ -7546,6 +7558,25 @@ def clip(
75467558
2 0 3
75477559
3 6 8
75487560
4 5 3
7561+
7562+
Clips using specific lower threshold per column element, with missing values:
7563+
7564+
>>> t = pd.Series([2, -4, np.NaN, 6, 3])
7565+
>>> t
7566+
0 2.0
7567+
1 -4.0
7568+
2 NaN
7569+
3 6.0
7570+
4 3.0
7571+
dtype: float64
7572+
7573+
>>> df.clip(t, axis=0)
7574+
col_0 col_1
7575+
0 9 2
7576+
1 -3 -4
7577+
2 0 6
7578+
3 6 8
7579+
4 5 3
75497580
"""
75507581
inplace = validate_bool_kwarg(inplace, "inplace")
75517582

@@ -7558,9 +7589,17 @@ def clip(
75587589
# so ignore
75597590
# GH 19992
75607591
# numpy doesn't drop a list-like bound containing NaN
7561-
if not is_list_like(lower) and np.any(isna(lower)):
7592+
isna_lower = isna(lower)
7593+
if not is_list_like(lower):
7594+
if np.any(isna_lower):
7595+
lower = None
7596+
elif np.all(isna_lower):
75627597
lower = None
7563-
if not is_list_like(upper) and np.any(isna(upper)):
7598+
isna_upper = isna(upper)
7599+
if not is_list_like(upper):
7600+
if np.any(isna_upper):
7601+
upper = None
7602+
elif np.all(isna_upper):
75647603
upper = None
75657604

75667605
# GH 2747 (arguments were reversed)

pandas/tests/frame/methods/test_clip.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -144,17 +144,25 @@ def test_clip_with_na_args(self, float_frame):
144144
tm.assert_frame_equal(float_frame.clip(np.nan), float_frame)
145145
tm.assert_frame_equal(float_frame.clip(upper=np.nan, lower=np.nan), float_frame)
146146

147-
# GH#19992
147+
# GH#19992 and adjusted in GH#40420
148148
df = DataFrame({"col_0": [1, 2, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]})
149149

150150
result = df.clip(lower=[4, 5, np.nan], axis=0)
151151
expected = DataFrame(
152-
{"col_0": [4, 5, np.nan], "col_1": [4, 5, np.nan], "col_2": [7, 8, np.nan]}
152+
{"col_0": [4, 5, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]}
153153
)
154154
tm.assert_frame_equal(result, expected)
155155

156156
result = df.clip(lower=[4, 5, np.nan], axis=1)
157157
expected = DataFrame(
158-
{"col_0": [4, 4, 4], "col_1": [5, 5, 6], "col_2": [np.nan, np.nan, np.nan]}
158+
{"col_0": [4, 4, 4], "col_1": [5, 5, 6], "col_2": [7, 8, 9]}
159159
)
160160
tm.assert_frame_equal(result, expected)
161+
162+
# GH#40420
163+
data = {"col_0": [9, -3, 0, -1, 5], "col_1": [-2, -7, 6, 8, -5]}
164+
df = DataFrame(data)
165+
t = Series([2, -4, np.NaN, 6, 3])
166+
result = df.clip(lower=t, axis=0)
167+
expected = DataFrame({"col_0": [9, -3, 0, 6, 5], "col_1": [2, -4, 6, 8, 3]})
168+
tm.assert_frame_equal(result, expected)

pandas/tests/series/methods/test_clip.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,13 @@ def test_clip_with_na_args(self):
4949
tm.assert_series_equal(s.clip(upper=np.nan, lower=np.nan), Series([1, 2, 3]))
5050

5151
# GH#19992
52-
tm.assert_series_equal(s.clip(lower=[0, 4, np.nan]), Series([1, 4, np.nan]))
53-
tm.assert_series_equal(s.clip(upper=[1, np.nan, 1]), Series([1, np.nan, 1]))
52+
tm.assert_series_equal(s.clip(lower=[0, 4, np.nan]), Series([1, 4, 3]))
53+
tm.assert_series_equal(s.clip(upper=[1, np.nan, 1]), Series([1, 2, 1]))
54+
55+
# GH#40420
56+
s = Series([1, 2, 3])
57+
result = s.clip(0, [np.nan, np.nan, np.nan])
58+
tm.assert_series_equal(s, result)
5459

5560
def test_clip_against_series(self):
5661
# GH#6966

0 commit comments

Comments
 (0)