From 5ef4ebaa18823df7d82a85091455b9d46cad1ecf Mon Sep 17 00:00:00 2001 From: Tobias Pitters Date: Wed, 13 May 2020 22:36:38 +0200 Subject: [PATCH 1/9] all tests working --- pandas/core/generic.py | 20 +++++++++++-------- .../tests/frame/methods/test_interpolate.py | 6 ++++-- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 56c868c7bf01e..65e1960fdd668 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6881,30 +6881,34 @@ def interpolate( inplace = validate_bool_kwarg(inplace, "inplace") axis = self._get_axis_number(axis) + index = self._get_axis(axis) - if axis == 0: - df = self - else: - df = self.T - - if isinstance(df.index, MultiIndex) and method != "linear": + if isinstance(self.index, MultiIndex) and method != "linear": raise ValueError( "Only `method=linear` interpolation is supported on MultiIndexes." ) - if df.ndim == 2 and np.all(df.dtypes == np.dtype(object)): + if self.ndim == 2 and np.all(self.dtypes == np.dtype(object)): raise TypeError( "Cannot interpolate with all object-dtype columns " "in the DataFrame. Try setting at least one " "column to a numeric dtype." ) + + if method in ['backfill', 'bfill', 'pad', 'ffill']: + return self.fillna(method=method, axis=axis, inplace=inplace, limit=limit, downcast=downcast) # create/use the index + + if axis==0: + df = self + else: + df = self.T + if method == "linear": # prior default index = np.arange(len(df.index)) else: - index = df.index methods = {"index", "values", "nearest", "time"} is_numeric_or_datetime = ( is_numeric_dtype(index) diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index 3b8fa0dfbb603..eb0062819b3d3 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -202,7 +202,8 @@ def test_interp_leading_nans(self, check_scipy): result = df.interpolate(method="polynomial", order=1) tm.assert_frame_equal(result, expected) - def test_interp_raise_on_only_mixed(self): + @pytest.mark.parametrize('axis', [0, 1]) + def test_interp_raise_on_only_mixed(self, axis): df = DataFrame( { "A": [1, 2, np.nan, 4], @@ -213,7 +214,7 @@ def test_interp_raise_on_only_mixed(self): } ) with pytest.raises(TypeError): - df.interpolate(axis=1) + df.astype('object').interpolate(axis=axis) def test_interp_raise_on_all_object_dtype(self): # GH 22985 @@ -284,3 +285,4 @@ def test_interp_time_inplace_axis(self, axis): result = expected.interpolate(axis=0, method="time") expected.interpolate(axis=0, method="time", inplace=True) tm.assert_frame_equal(result, expected) + From 4f3bcbc44369e9eb20291530d7d82fee4fc7946f Mon Sep 17 00:00:00 2001 From: Tobias Pitters Date: Wed, 13 May 2020 23:16:12 +0200 Subject: [PATCH 2/9] add tests --- .../tests/frame/methods/test_interpolate.py | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index eb0062819b3d3..16e9f46051861 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -286,3 +286,45 @@ def test_interp_time_inplace_axis(self, axis): expected.interpolate(axis=0, method="time", inplace=True) tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("axis", [0, 1]) + def test_interp_ffill(self, axis): + # GH 33956 + df = DataFrame( + { + "A": [1.0, 2.0, 3.0, 4.0, np.nan, 5.0], + "B": [2.0, 4.0, 6.0, np.nan, 8.0, 10.0], + "C": [3.0, 6.0, 9.0, np.nan, np.nan, 30.0], + } + ) + expected = df.ffill(axis=axis) + result = df.interpolate(method="ffill", axis=axis) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("axis", [0, 1]) + def test_interp_bfill(self, axis): + # GH 33956 + df = DataFrame( + { + "A": [1.0, 2.0, 3.0, 4.0, np.nan, 5.0], + "B": [2.0, 4.0, 6.0, np.nan, 8.0, 10.0], + "C": [3.0, 6.0, 9.0, np.nan, np.nan, 30.0], + } + ) + expected = df.bfill(axis=axis) + result = df.interpolate(method="bfill", axis=axis) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("axis", [0, 1]) + def test_interp_pad(self, axis): + # GH 33956 + df = DataFrame( + { + "A": [1.0, 2.0, 3.0, 4.0, np.nan, 5.0], + "B": [2.0, 4.0, 6.0, np.nan, 8.0, 10.0], + "C": [3.0, 6.0, 9.0, np.nan, np.nan, 30.0], + } + ) + expected = df.fillna(method='pad', axis=axis) + result = df.interpolate(method="pad", axis=axis) + tm.assert_frame_equal(result, expected) From fda8be0326e3d9d842683432bc41e46b04b98903 Mon Sep 17 00:00:00 2001 From: Tobias Pitters Date: Wed, 13 May 2020 23:24:34 +0200 Subject: [PATCH 3/9] formatting with black pandas --- pandas/core/generic.py | 13 +++++++++---- pandas/tests/frame/methods/test_interpolate.py | 7 +++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 65e1960fdd668..fffc96162443a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6895,12 +6895,17 @@ def interpolate( "column to a numeric dtype." ) - - if method in ['backfill', 'bfill', 'pad', 'ffill']: - return self.fillna(method=method, axis=axis, inplace=inplace, limit=limit, downcast=downcast) + if method in ["backfill", "bfill", "pad", "ffill"]: + return self.fillna( + method=method, + axis=axis, + inplace=inplace, + limit=limit, + downcast=downcast, + ) # create/use the index - if axis==0: + if axis == 0: df = self else: df = self.T diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index 16e9f46051861..77f355e9f50a2 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -202,7 +202,7 @@ def test_interp_leading_nans(self, check_scipy): result = df.interpolate(method="polynomial", order=1) tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize('axis', [0, 1]) + @pytest.mark.parametrize("axis", [0, 1]) def test_interp_raise_on_only_mixed(self, axis): df = DataFrame( { @@ -214,7 +214,7 @@ def test_interp_raise_on_only_mixed(self, axis): } ) with pytest.raises(TypeError): - df.astype('object').interpolate(axis=axis) + df.astype("object").interpolate(axis=axis) def test_interp_raise_on_all_object_dtype(self): # GH 22985 @@ -286,7 +286,6 @@ def test_interp_time_inplace_axis(self, axis): expected.interpolate(axis=0, method="time", inplace=True) tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("axis", [0, 1]) def test_interp_ffill(self, axis): # GH 33956 @@ -325,6 +324,6 @@ def test_interp_pad(self, axis): "C": [3.0, 6.0, 9.0, np.nan, np.nan, 30.0], } ) - expected = df.fillna(method='pad', axis=axis) + expected = df.fillna(method="pad", axis=axis) result = df.interpolate(method="pad", axis=axis) tm.assert_frame_equal(result, expected) From 5b21ba59d3d5a6370e7aa8ee5ddb0270b465dcd2 Mon Sep 17 00:00:00 2001 From: Tobias Pitters Date: Thu, 14 May 2020 18:35:29 +0200 Subject: [PATCH 4/9] change order of raising TypeError and calling fillna functions to be consistens with previous version, add TODO --- pandas/core/generic.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 44ef2b1e18980..14b6df6efc4f8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6888,13 +6888,6 @@ def interpolate( "Only `method=linear` interpolation is supported on MultiIndexes." ) - if self.ndim == 2 and np.all(self.dtypes == np.dtype(object)): - raise TypeError( - "Cannot interpolate with all object-dtype columns " - "in the DataFrame. Try setting at least one " - "column to a numeric dtype." - ) - if method in ["backfill", "bfill", "pad", "ffill"]: return self.fillna( method=method, @@ -6903,13 +6896,21 @@ def interpolate( limit=limit, downcast=downcast, ) - # create/use the index + # todo: change interpolation so that no transposing is necessary + # Currently we need this to call the axis correctly inside the various interpolation methods if axis == 0: df = self else: df = self.T + if self.ndim == 2 and np.all(self.dtypes == np.dtype(object)): + raise TypeError( + "Cannot interpolate with all object-dtype columns " + "in the DataFrame. Try setting at least one " + "column to a numeric dtype." + ) + if method == "linear": # prior default index = np.arange(len(df.index)) From 1280b32d6a2559c00c9e415de0d1707233535e7e Mon Sep 17 00:00:00 2001 From: Tobias Pitters Date: Thu, 14 May 2020 20:25:30 +0200 Subject: [PATCH 5/9] fix line length for linting --- pandas/core/generic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 14b6df6efc4f8..d67689d58e55a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6898,7 +6898,8 @@ def interpolate( ) # todo: change interpolation so that no transposing is necessary - # Currently we need this to call the axis correctly inside the various interpolation methods + # Currently we need this to call the axis correctly inside the various + # interpolation methods if axis == 0: df = self else: From 01fec55ca568dae2be31432ee265df713140d411 Mon Sep 17 00:00:00 2001 From: Tobias Pitters Date: Sun, 17 May 2020 17:51:45 +0200 Subject: [PATCH 6/9] add comments for fillna methods and use parametrize for tests --- pandas/core/generic.py | 2 ++ .../tests/frame/methods/test_interpolate.py | 35 +++---------------- 2 files changed, 6 insertions(+), 31 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d67689d58e55a..61b166299a8a8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6888,6 +6888,8 @@ def interpolate( "Only `method=linear` interpolation is supported on MultiIndexes." ) + # for the methods backfill, bfill, pad, ffill limit_direction and limit_area + # are being ignored, see #26796 for more information if method in ["backfill", "bfill", "pad", "ffill"]: return self.fillna( method=method, diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index 4bc3ca7ed6f05..8987ad517ec29 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -298,8 +298,9 @@ def test_interp_string_axis(self, axis_name, axis_number): expected = df.interpolate(method="linear", axis=axis_number) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("method", ["ffill", "bfill", "pad"]) @pytest.mark.parametrize("axis", [0, 1]) - def test_interp_ffill(self, axis): + def test_interp_fillna_methods(self, axis, method): # GH 33956 df = DataFrame( { @@ -308,34 +309,6 @@ def test_interp_ffill(self, axis): "C": [3.0, 6.0, 9.0, np.nan, np.nan, 30.0], } ) - expected = df.ffill(axis=axis) - result = df.interpolate(method="ffill", axis=axis) - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize("axis", [0, 1]) - def test_interp_bfill(self, axis): - # GH 33956 - df = DataFrame( - { - "A": [1.0, 2.0, 3.0, 4.0, np.nan, 5.0], - "B": [2.0, 4.0, 6.0, np.nan, 8.0, 10.0], - "C": [3.0, 6.0, 9.0, np.nan, np.nan, 30.0], - } - ) - expected = df.bfill(axis=axis) - result = df.interpolate(method="bfill", axis=axis) - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize("axis", [0, 1]) - def test_interp_pad(self, axis): - # GH 33956 - df = DataFrame( - { - "A": [1.0, 2.0, 3.0, 4.0, np.nan, 5.0], - "B": [2.0, 4.0, 6.0, np.nan, 8.0, 10.0], - "C": [3.0, 6.0, 9.0, np.nan, np.nan, 30.0], - } - ) - expected = df.fillna(method="pad", axis=axis) - result = df.interpolate(method="pad", axis=axis) + expected = df.fillna(axis=axis, method=method) + result = df.interpolate(method=method, axis=axis) tm.assert_frame_equal(result, expected) From baeae0ce4cdb73a4011a6aada4ec32fd7174da78 Mon Sep 17 00:00:00 2001 From: Tobias Pitters Date: Sun, 17 May 2020 23:40:37 +0200 Subject: [PATCH 7/9] added whatsnew entry --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 5c74965bffdd7..8f90e95ce8f06 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -337,6 +337,7 @@ Backwards incompatible API changes - Combining a ``Categorical`` with integer categories and which contains missing values with a float dtype column in operations such as :func:`concat` or :meth:`~DataFrame.append` will now result in a float column instead of an object dtyped column (:issue:`33607`) +- :meth:`DataFrame.interpolate` uses the correct axis convention. Previously interpolating along columns lead to interpolation along indices and vice versa. ``MultiIndex.get_indexer`` interprets `method` argument differently ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From 502f82701b286e7021bb9a12655417d0742f9180 Mon Sep 17 00:00:00 2001 From: Tobias Pitters Date: Thu, 28 May 2020 00:17:51 +0200 Subject: [PATCH 8/9] update whatsnew, add PR comment to generic.py, use fixtures in test_interpolate and check message --- doc/source/whatsnew/v1.1.0.rst | 4 ++-- pandas/core/generic.py | 3 +-- pandas/tests/frame/methods/test_interpolate.py | 12 +++++++----- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 8f90e95ce8f06..219a6a7552eca 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -337,7 +337,6 @@ Backwards incompatible API changes - Combining a ``Categorical`` with integer categories and which contains missing values with a float dtype column in operations such as :func:`concat` or :meth:`~DataFrame.append` will now result in a float column instead of an object dtyped column (:issue:`33607`) -- :meth:`DataFrame.interpolate` uses the correct axis convention. Previously interpolating along columns lead to interpolation along indices and vice versa. ``MultiIndex.get_indexer`` interprets `method` argument differently ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -734,7 +733,8 @@ Missing - Bug in :meth:`replace` when argument ``to_replace`` is of type dict/list and is used on a :class:`Series` containing ```` was raising a ``TypeError``. The method now handles this by ignoring ```` values when doing the comparison for the replacement (:issue:`32621`) - Bug in :meth:`~Series.any` and :meth:`~Series.all` incorrectly returning ```` for all ``False`` or all ``True`` values using the nulllable boolean dtype and with ``skipna=False`` (:issue:`33253`) - Clarified documentation on interpolate with method =akima. The ``der`` parameter must be scalar or None (:issue:`33426`) - +- :meth:`DataFrame.interpolate` uses the correct axis convention now. Previously interpolating along columns lead to interpolation along indices and vice versa. Furthermore interpolating with methods ``pad``, ``ffill``, ``bfill`` and ``backfill`` are identical to using these methods with :meth:`fillna` (:issue:`12918`, :issue:`29146`) +- Bug in :meth:`DataFrame.interpolate` when called on a DataFrame with column names of string type was throwing a ValueError. The method is no independing of the type of column names (:issue:`33956`) MultiIndex ^^^^^^^^^^ - Bug in :meth:`Dataframe.loc` when used with a :class:`MultiIndex`. The returned values were not in the same order as the given inputs (:issue:`22797`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 61b166299a8a8..e9f7bf457cbfd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6889,7 +6889,7 @@ def interpolate( ) # for the methods backfill, bfill, pad, ffill limit_direction and limit_area - # are being ignored, see #26796 for more information + # are being ignored, see gh-26796 for more information if method in ["backfill", "bfill", "pad", "ffill"]: return self.fillna( method=method, @@ -6899,7 +6899,6 @@ def interpolate( downcast=downcast, ) - # todo: change interpolation so that no transposing is necessary # Currently we need this to call the axis correctly inside the various # interpolation methods if axis == 0: diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index 8987ad517ec29..efb3d719016bb 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -202,7 +202,6 @@ def test_interp_leading_nans(self, check_scipy): result = df.interpolate(method="polynomial", order=1) tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("axis", [0, 1]) def test_interp_raise_on_only_mixed(self, axis): df = DataFrame( { @@ -213,7 +212,12 @@ def test_interp_raise_on_only_mixed(self, axis): "E": [1, 2, 3, 4], } ) - with pytest.raises(TypeError): + msg = ( + "Cannot interpolate with all object-dtype columns " + "in the DataFrame. Try setting at least one " + "column to a numeric dtype." + ) + with pytest.raises(TypeError, match=msg): df.astype("object").interpolate(axis=axis) def test_interp_raise_on_all_object_dtype(self): @@ -273,7 +277,6 @@ def test_interp_ignore_all_good(self): result = df[["B", "D"]].interpolate(downcast=None) tm.assert_frame_equal(result, df[["B", "D"]]) - @pytest.mark.parametrize("axis", [0, 1]) def test_interp_time_inplace_axis(self, axis): # GH 9687 periods = 5 @@ -299,9 +302,8 @@ def test_interp_string_axis(self, axis_name, axis_number): tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("method", ["ffill", "bfill", "pad"]) - @pytest.mark.parametrize("axis", [0, 1]) def test_interp_fillna_methods(self, axis, method): - # GH 33956 + # GH 12918 df = DataFrame( { "A": [1.0, 2.0, 3.0, 4.0, np.nan, 5.0], From 42895db0e585a6274f45ef41b298c6d597e0e61e Mon Sep 17 00:00:00 2001 From: Tobias Pitters Date: Fri, 29 May 2020 18:25:32 +0200 Subject: [PATCH 9/9] new line in v1.1.0.rst --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 219a6a7552eca..dd6ca8a0c969a 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -735,6 +735,7 @@ Missing - Clarified documentation on interpolate with method =akima. The ``der`` parameter must be scalar or None (:issue:`33426`) - :meth:`DataFrame.interpolate` uses the correct axis convention now. Previously interpolating along columns lead to interpolation along indices and vice versa. Furthermore interpolating with methods ``pad``, ``ffill``, ``bfill`` and ``backfill`` are identical to using these methods with :meth:`fillna` (:issue:`12918`, :issue:`29146`) - Bug in :meth:`DataFrame.interpolate` when called on a DataFrame with column names of string type was throwing a ValueError. The method is no independing of the type of column names (:issue:`33956`) + MultiIndex ^^^^^^^^^^ - Bug in :meth:`Dataframe.loc` when used with a :class:`MultiIndex`. The returned values were not in the same order as the given inputs (:issue:`22797`)