From 89bfe1da633c73d0361bda5ad68c71ce598b87e1 Mon Sep 17 00:00:00 2001 From: Waltteri Koskinen <45742869+Rasori@users.noreply.github.com> Date: Sat, 24 Apr 2021 18:36:31 +0300 Subject: [PATCH 1/4] Fix series clipping NA issue Series clipping method could not handle series with NA values. Issue was fixed by first comparing values and only after then converted to numpy array. --- pandas/core/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index eba4a36315ba4..db909ef0a6822 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7404,10 +7404,10 @@ def _clip_with_scalar(self, lower, upper, inplace: bool_t = False): with np.errstate(all="ignore"): if upper is not None: - subset = self.to_numpy() <= upper + subset = (self <= upper).to_numpy() result = result.where(subset, upper, axis=None, inplace=False) if lower is not None: - subset = self.to_numpy() >= lower + subset = (self >= lower).to_numpy() result = result.where(subset, lower, axis=None, inplace=False) if np.any(mask): From b621d79842f77ab7385628a05047c2bb10d9e877 Mon Sep 17 00:00:00 2001 From: Waltteri Koskinen <45742869+Rasori@users.noreply.github.com> Date: Tue, 27 Apr 2021 19:37:54 +0300 Subject: [PATCH 2/4] Add test to ensure series clipping method can handle NA values --- pandas/tests/series/methods/test_clip.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/tests/series/methods/test_clip.py b/pandas/tests/series/methods/test_clip.py index 442718d677101..c011af3d99ccb 100644 --- a/pandas/tests/series/methods/test_clip.py +++ b/pandas/tests/series/methods/test_clip.py @@ -40,6 +40,21 @@ def test_clip_types_and_nulls(self): assert list(isna(s)) == list(isna(lower)) assert list(isna(s)) == list(isna(upper)) + @pytest.mark.parametrize("dtypes", ["Float64", "Int64", "Float32", "Int32"]) + def test_series_clipping_with_na_values(self, dtypes): + # Ensure that clipping method can handle NA values with out failing + # GH#40581 + + s = Series([pd.NA, 1.0, 3.0], dtype=dtypes) + s_clipped_upper = s.clip(upper=2.0) + s_clipped_lower = s.clip(lower=2.0) + + expected_upper = Series([pd.NA, 1.0, 2.0], dtype=dtypes) + expected_lower = Series([pd.NA, 2.0, 3.0], dtype=dtypes) + + tm.assert_series_equal(s_clipped_upper, expected_upper) + tm.assert_series_equal(s_clipped_lower, expected_lower) + def test_clip_with_na_args(self): """Should process np.nan argument as None """ # GH#17276 From f49cfd0c9cfe38d4fdd084eb0d6cbd13a4fd3221 Mon Sep 17 00:00:00 2001 From: Waltteri Koskinen <45742869+Rasori@users.noreply.github.com> Date: Tue, 27 Apr 2021 19:54:29 +0300 Subject: [PATCH 3/4] Add whatsnew entry --- doc/source/whatsnew/v1.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 74710ca48308c..0c1c835e2b70c 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -696,6 +696,7 @@ Numeric - Bug in :meth:`DataFrame.transform` would raise ``SpecificationError`` when passed a dictionary and columns were missing; will now raise a ``KeyError`` instead (:issue:`40004`) - Bug in :meth:`DataFrameGroupBy.rank` giving incorrect results with ``pct=True`` and equal values between consecutive groups (:issue:`40518`) - Bug in :meth:`Series.count` would result in an ``int32`` result on 32-bit platforms when argument ``level=None`` (:issue:`40908`) +- Bug in :meth:`Series.clip` would fail if series contains NA values and has nullable int or float as a data type (:issue:`40851`) Conversion ^^^^^^^^^^ From 47d4a8249f0968ec44f82594f3b2c1779a3ece22 Mon Sep 17 00:00:00 2001 From: Waltteri Koskinen <45742869+Rasori@users.noreply.github.com> Date: Wed, 28 Apr 2021 18:38:06 +0300 Subject: [PATCH 4/4] Parametrize test using fixtures for dtypes and null values. --- pandas/tests/series/methods/test_clip.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/pandas/tests/series/methods/test_clip.py b/pandas/tests/series/methods/test_clip.py index c011af3d99ccb..6185fe6c54fa4 100644 --- a/pandas/tests/series/methods/test_clip.py +++ b/pandas/tests/series/methods/test_clip.py @@ -40,17 +40,22 @@ def test_clip_types_and_nulls(self): assert list(isna(s)) == list(isna(lower)) assert list(isna(s)) == list(isna(upper)) - @pytest.mark.parametrize("dtypes", ["Float64", "Int64", "Float32", "Int32"]) - def test_series_clipping_with_na_values(self, dtypes): + def test_series_clipping_with_na_values( + self, any_nullable_numeric_dtype, nulls_fixture + ): # Ensure that clipping method can handle NA values with out failing # GH#40581 - s = Series([pd.NA, 1.0, 3.0], dtype=dtypes) + s = Series([nulls_fixture, 1.0, 3.0], dtype=any_nullable_numeric_dtype) s_clipped_upper = s.clip(upper=2.0) s_clipped_lower = s.clip(lower=2.0) - expected_upper = Series([pd.NA, 1.0, 2.0], dtype=dtypes) - expected_lower = Series([pd.NA, 2.0, 3.0], dtype=dtypes) + expected_upper = Series( + [nulls_fixture, 1.0, 2.0], dtype=any_nullable_numeric_dtype + ) + expected_lower = Series( + [nulls_fixture, 2.0, 3.0], dtype=any_nullable_numeric_dtype + ) tm.assert_series_equal(s_clipped_upper, expected_upper) tm.assert_series_equal(s_clipped_lower, expected_lower)