From 31f856735e93d86587906097ce3d06b8c30479f6 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Thu, 27 Dec 2018 16:16:32 -0500 Subject: [PATCH 1/6] use self.where method instead of numpy where to reuse casting logics --- pandas/core/generic.py | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 65b219cc57f3a..56fc61421f8e7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7132,28 +7132,23 @@ def _clip_with_scalar(self, lower, upper, inplace=False): (upper is not None and np.any(isna(upper)))): raise ValueError("Cannot use an NA value as a clip threshold") - result = self.values + result = self mask = isna(result) - with np.errstate(all='ignore'): - if upper is not None: - result = np.where(result >= upper, upper, result) - if lower is not None: - result = np.where(result <= lower, lower, result) + if upper is not None: + subset = self.le(upper, axis=None) | isna(result) + result = self.where(subset, upper, axis=None, inplace=inplace) + if lower is not None: + subset = self.ge(lower, axis=None) | isna(result) + result = self.where(subset, lower, axis=None, inplace=inplace) + if np.any(mask): result[mask] = np.nan - axes_dict = self._construct_axes_dict() - result = self._constructor(result, **axes_dict).__finalize__(self) - - if inplace: - self._update_inplace(result) - else: - return result + return result def _clip_with_one_bound(self, threshold, method, axis, inplace): - inplace = validate_bool_kwarg(inplace, 'inplace') if axis is not None: axis = self._get_axis_number(axis) From 61bb0c869c42eb6799b3d892ce0c62d2be0d7088 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Thu, 27 Dec 2018 18:50:28 -0500 Subject: [PATCH 2/6] modify clip scalar --- pandas/core/generic.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 56fc61421f8e7..6794d0b37653a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7133,17 +7133,15 @@ def _clip_with_scalar(self, lower, upper, inplace=False): raise ValueError("Cannot use an NA value as a clip threshold") result = self - mask = isna(result) if upper is not None: subset = self.le(upper, axis=None) | isna(result) - result = self.where(subset, upper, axis=None, inplace=inplace) + result = result.where(subset, upper, axis=None, inplace=inplace) if lower is not None: + if inplace: + result = self subset = self.ge(lower, axis=None) | isna(result) - result = self.where(subset, lower, axis=None, inplace=inplace) - - if np.any(mask): - result[mask] = np.nan + result = result.where(subset, lower, axis=None, inplace=inplace) return result From 723594081652ebfe739d42e342f596c454fcadfd Mon Sep 17 00:00:00 2001 From: Ming Li Date: Thu, 27 Dec 2018 18:57:48 -0500 Subject: [PATCH 3/6] modify expected case --- pandas/tests/frame/test_analytics.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 88262220015c7..12cbcff4ffc79 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1895,7 +1895,8 @@ def test_clip_mixed_numeric(self): df = DataFrame({'A': [1, 2, 3], 'B': [1., np.nan, 3.]}) result = df.clip(1, 2) - expected = DataFrame({'A': [1, 2, 2.], + # GH 24162, clipping now preserves types + expected = DataFrame({'A': [1, 2, 2], 'B': [1., np.nan, 2.]}) tm.assert_frame_equal(result, expected, check_like=True) From b5a61215a2d4638aee88c0e291b09a3730969d37 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Fri, 28 Dec 2018 12:20:55 -0500 Subject: [PATCH 4/6] use existing handling of inplace --- pandas/core/generic.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6794d0b37653a..efb3f20202c42 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7133,17 +7133,17 @@ def _clip_with_scalar(self, lower, upper, inplace=False): raise ValueError("Cannot use an NA value as a clip threshold") result = self - if upper is not None: subset = self.le(upper, axis=None) | isna(result) - result = result.where(subset, upper, axis=None, inplace=inplace) + result = result.where(subset, upper, axis=None, inplace=False) if lower is not None: - if inplace: - result = self subset = self.ge(lower, axis=None) | isna(result) - result = result.where(subset, lower, axis=None, inplace=inplace) + result = result.where(subset, lower, axis=None, inplace=False) - return result + if inplace: + self._update_inplace(result) + else: + return result def _clip_with_one_bound(self, threshold, method, axis, inplace): From 45bd47800d70de0e2bf170043eeeb2630678fea9 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Fri, 28 Dec 2018 12:40:54 -0500 Subject: [PATCH 5/6] add test case --- pandas/tests/frame/test_analytics.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 12cbcff4ffc79..baf763d7b1d03 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1836,7 +1836,6 @@ def test_pct_change(self): tm.assert_frame_equal(result, expected) # Clip - def test_clip(self, float_frame): median = float_frame.median().median() original = float_frame.copy() @@ -1895,11 +1894,17 @@ def test_clip_mixed_numeric(self): df = DataFrame({'A': [1, 2, 3], 'B': [1., np.nan, 3.]}) result = df.clip(1, 2) - # GH 24162, clipping now preserves types expected = DataFrame({'A': [1, 2, 2], 'B': [1., np.nan, 2.]}) tm.assert_frame_equal(result, expected, check_like=True) + # GH 24162, clipping now preserves numeric types per column + df = DataFrame([[1, 2, 3.4], [3, 4, 5.6]], + columns=['foo', 'bar', 'baz']) + expected = df.dtypes + result = df.clip(upper=3).dtypes + tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("inplace", [True, False]) def test_clip_against_series(self, inplace): # GH 6966 From fab5c99926bf0ddad178cd530d493d0344719e83 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Fri, 28 Dec 2018 12:55:35 -0500 Subject: [PATCH 6/6] update whatsnew --- doc/source/whatsnew/v0.24.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index d8a204abcd93e..1eae6d43677f1 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1405,6 +1405,7 @@ Conversion ^^^^^^^^^^ - Bug in :meth:`DataFrame.combine_first` in which column types were unexpectedly converted to float (:issue:`20699`) +- Bug in :meth:`DataFrame.clip` in which column types are not preserved and casted to float (:issue:`24162`) Strings ^^^^^^^