diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 34269185bccd6..34d833bb52d99 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -910,6 +910,7 @@ Other - Bug in :meth:`DataFrame.equals`, :meth:`Series.equals`, :meth:`Index.equals` with object-dtype containing ``np.datetime64("NaT")`` or ``np.timedelta64("NaT")`` (:issue:`39650`) - Bug in :func:`pandas.util.show_versions` where console JSON output was not proper JSON (:issue:`39701`) - Bug in :meth:`DataFrame.convert_dtypes` incorrectly raised ValueError when called on an empty DataFrame (:issue:`40393`) +- Bug in :meth:`DataFrame.clip` not interpreting missing values as no threshold (:issue:`40420`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d69e933164118..c77a3717c4c03 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7341,8 +7341,6 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace): return self._clip_with_scalar(None, threshold, inplace=inplace) return self._clip_with_scalar(threshold, None, inplace=inplace) - subset = method(threshold, axis=axis) | isna(self) - # GH #15390 # In order for where method to work, the threshold must # be transformed to NDFrame from other array like structure. @@ -7351,6 +7349,18 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace): threshold = self._constructor(threshold, index=self.index) else: threshold = align_method_FRAME(self, threshold, axis, flex=None)[1] + + # GH 40420 + # Treat missing thresholds as no bounds, not clipping the values + if is_list_like(threshold): + fill_value = np.inf if method.__name__ == "le" else -np.inf + threshold_inf = threshold.fillna(fill_value) + else: + threshold_inf = threshold + + subset = method(threshold_inf, axis=axis) | isna(self) + + # GH 40420 return self.where(subset, threshold, axis=axis, inplace=inplace) @overload @@ -7482,10 +7492,12 @@ def clip( ---------- lower : float or array_like, default None Minimum threshold value. All values below this - threshold will be set to it. + threshold will be set to it. A missing + threshold (e.g `NA`) will not clip the value. upper : float or array_like, default None Maximum threshold value. All values above this - threshold will be set to it. + threshold will be set to it. A missing + threshold (e.g `NA`) will not clip the value. axis : int or str axis name, optional Align object with lower and upper along the given axis. inplace : bool, default False @@ -7546,6 +7558,25 @@ def clip( 2 0 3 3 6 8 4 5 3 + + Clips using specific lower threshold per column element, with missing values: + + >>> t = pd.Series([2, -4, np.NaN, 6, 3]) + >>> t + 0 2.0 + 1 -4.0 + 2 NaN + 3 6.0 + 4 3.0 + dtype: float64 + + >>> df.clip(t, axis=0) + col_0 col_1 + 0 9 2 + 1 -3 -4 + 2 0 6 + 3 6 8 + 4 5 3 """ inplace = validate_bool_kwarg(inplace, "inplace") @@ -7558,9 +7589,17 @@ def clip( # so ignore # GH 19992 # numpy doesn't drop a list-like bound containing NaN - if not is_list_like(lower) and np.any(isna(lower)): + isna_lower = isna(lower) + if not is_list_like(lower): + if np.any(isna_lower): + lower = None + elif np.all(isna_lower): lower = None - if not is_list_like(upper) and np.any(isna(upper)): + isna_upper = isna(upper) + if not is_list_like(upper): + if np.any(isna_upper): + upper = None + elif np.all(isna_upper): upper = None # GH 2747 (arguments were reversed) diff --git a/pandas/tests/frame/methods/test_clip.py b/pandas/tests/frame/methods/test_clip.py index 8a2374a414482..6525109da4394 100644 --- a/pandas/tests/frame/methods/test_clip.py +++ b/pandas/tests/frame/methods/test_clip.py @@ -144,17 +144,25 @@ def test_clip_with_na_args(self, float_frame): tm.assert_frame_equal(float_frame.clip(np.nan), float_frame) tm.assert_frame_equal(float_frame.clip(upper=np.nan, lower=np.nan), float_frame) - # GH#19992 + # GH#19992 and adjusted in GH#40420 df = DataFrame({"col_0": [1, 2, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]}) result = df.clip(lower=[4, 5, np.nan], axis=0) expected = DataFrame( - {"col_0": [4, 5, np.nan], "col_1": [4, 5, np.nan], "col_2": [7, 8, np.nan]} + {"col_0": [4, 5, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]} ) tm.assert_frame_equal(result, expected) result = df.clip(lower=[4, 5, np.nan], axis=1) expected = DataFrame( - {"col_0": [4, 4, 4], "col_1": [5, 5, 6], "col_2": [np.nan, np.nan, np.nan]} + {"col_0": [4, 4, 4], "col_1": [5, 5, 6], "col_2": [7, 8, 9]} ) tm.assert_frame_equal(result, expected) + + # GH#40420 + data = {"col_0": [9, -3, 0, -1, 5], "col_1": [-2, -7, 6, 8, -5]} + df = DataFrame(data) + t = Series([2, -4, np.NaN, 6, 3]) + result = df.clip(lower=t, axis=0) + expected = DataFrame({"col_0": [9, -3, 0, 6, 5], "col_1": [2, -4, 6, 8, 3]}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_clip.py b/pandas/tests/series/methods/test_clip.py index 528e95f65c8f4..442718d677101 100644 --- a/pandas/tests/series/methods/test_clip.py +++ b/pandas/tests/series/methods/test_clip.py @@ -49,8 +49,13 @@ def test_clip_with_na_args(self): tm.assert_series_equal(s.clip(upper=np.nan, lower=np.nan), Series([1, 2, 3])) # GH#19992 - tm.assert_series_equal(s.clip(lower=[0, 4, np.nan]), Series([1, 4, np.nan])) - tm.assert_series_equal(s.clip(upper=[1, np.nan, 1]), Series([1, np.nan, 1])) + tm.assert_series_equal(s.clip(lower=[0, 4, np.nan]), Series([1, 4, 3])) + tm.assert_series_equal(s.clip(upper=[1, np.nan, 1]), Series([1, 2, 1])) + + # GH#40420 + s = Series([1, 2, 3]) + result = s.clip(0, [np.nan, np.nan, np.nan]) + tm.assert_series_equal(s, result) def test_clip_against_series(self): # GH#6966