From 8eefbfdd0611650e1bb636fcff33a899a14b58fe Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Tue, 21 Jun 2022 20:09:33 +0200 Subject: [PATCH 1/5] Backport PR #47327: REGR: Fix fillna making a copy when dict was given as fill value and inplace is set --- doc/source/whatsnew/v1.4.3.rst | 1 + pandas/core/frame.py | 2 +- pandas/core/generic.py | 4 +++- pandas/tests/frame/methods/test_fillna.py | 12 ++++++++++++ pandas/tests/frame/methods/test_update.py | 13 +++++++++++++ 5 files changed, 30 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.4.3.rst b/doc/source/whatsnew/v1.4.3.rst index a4d81533df23d..d031426a2abbf 100644 --- a/doc/source/whatsnew/v1.4.3.rst +++ b/doc/source/whatsnew/v1.4.3.rst @@ -18,6 +18,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.to_csv` raising error when :class:`DataFrame` contains extension dtype categorical column (:issue:`46297`, :issue:`46812`) - Fixed regression in representation of ``dtypes`` attribute of :class:`MultiIndex` (:issue:`46900`) - Fixed regression when setting values with :meth:`DataFrame.loc` updating :class:`RangeIndex` when index was set as new column and column was updated afterwards (:issue:`47128`) +- Fixed regression in :meth:`DataFrame.fillna` and :meth:`DataFrame.update` creating a copy when updating inplace (:issue:`47188`) - Fixed regression in :meth:`DataFrame.nsmallest` led to wrong results when ``np.nan`` in the sorting column (:issue:`46589`) - Fixed regression in :func:`read_fwf` raising ``ValueError`` when ``widths`` was specified with ``usecols`` (:issue:`46580`) - Fixed regression in :func:`concat` not sorting columns for mixed column names (:issue:`47127`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 391c12905adae..5b25f5be01d29 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7594,7 +7594,7 @@ def update( if mask.all(): continue - self[col] = expressions.where(mask, this, that) + self.loc[:, col] = expressions.where(mask, this, that) # ---------------------------------------------------------------------- # Data reshaping diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d924093203d7e..6357a670e6ba6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6482,7 +6482,9 @@ def fillna( if k not in result: continue downcast_k = downcast if not is_dict else downcast.get(k) - result[k] = result[k].fillna(v, limit=limit, downcast=downcast_k) + result.loc[:, k] = result[k].fillna( + v, limit=limit, downcast=downcast_k + ) return result if not inplace else None elif not is_list_like(value): diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index f4957efcd228a..78671bbf8d5f1 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -265,6 +265,7 @@ def test_fillna_downcast_false(self, frame_or_series): result = obj.fillna("", downcast=False) tm.assert_equal(result, obj) + @td.skip_array_manager_not_yet_implemented @pytest.mark.parametrize("columns", [["A", "A", "B"], ["A", "A"]]) def test_fillna_dictlike_value_duplicate_colnames(self, columns): # GH#43476 @@ -654,6 +655,17 @@ def test_fillna_inplace_with_columns_limit_and_value(self): df.fillna(axis=1, value=100, limit=1, inplace=True) tm.assert_frame_equal(df, expected) + @td.skip_array_manager_invalid_test + @pytest.mark.parametrize("val", [-1, {"x": -1, "y": -1}]) + def test_inplace_dict_update_view(self, val): + # GH#47188 + df = DataFrame({"x": [np.nan, 2], "y": [np.nan, 2]}) + result_view = df[:] + df.fillna(val, inplace=True) + expected = DataFrame({"x": [-1, 2.0], "y": [-1.0, 2]}) + tm.assert_frame_equal(df, expected) + tm.assert_frame_equal(result_view, expected) + def test_fillna_nonconsolidated_frame(): # https://github.com/pandas-dev/pandas/issues/36495 diff --git a/pandas/tests/frame/methods/test_update.py b/pandas/tests/frame/methods/test_update.py index 408113e9bc417..d3257ac09a0ab 100644 --- a/pandas/tests/frame/methods/test_update.py +++ b/pandas/tests/frame/methods/test_update.py @@ -1,6 +1,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + import pandas as pd from pandas import ( DataFrame, @@ -146,3 +148,14 @@ def test_update_with_different_dtype(self): expected = DataFrame({"a": [1, 3], "b": [np.nan, 2], "c": ["foo", np.nan]}) tm.assert_frame_equal(df, expected) + + @td.skip_array_manager_invalid_test + def test_update_modify_view(self): + # GH#47188 + df = DataFrame({"A": ["1", np.nan], "B": ["100", np.nan]}) + df2 = DataFrame({"A": ["a", "x"], "B": ["100", "200"]}) + result_view = df2[:] + df2.update(df) + expected = DataFrame({"A": ["1", "x"], "B": ["100", "200"]}) + tm.assert_frame_equal(df2, expected) + tm.assert_frame_equal(result_view, expected) From 51aedcc2bccf11a766fbf63c6e8d6344fb3dc1ce Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Tue, 21 Jun 2022 21:53:25 +0200 Subject: [PATCH 2/5] Remove decorator --- pandas/tests/frame/methods/test_fillna.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 78671bbf8d5f1..cb0d24a86c1bc 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -265,7 +265,6 @@ def test_fillna_downcast_false(self, frame_or_series): result = obj.fillna("", downcast=False) tm.assert_equal(result, obj) - @td.skip_array_manager_not_yet_implemented @pytest.mark.parametrize("columns", [["A", "A", "B"], ["A", "A"]]) def test_fillna_dictlike_value_duplicate_colnames(self, columns): # GH#43476 From 55b1c7ba40ccf70cb928ef5af130d321c6375b7f Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Tue, 21 Jun 2022 22:19:36 +0200 Subject: [PATCH 3/5] Add specific xfail --- pandas/tests/frame/methods/test_fillna.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index cb0d24a86c1bc..ea90d6559b02f 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -266,8 +266,13 @@ def test_fillna_downcast_false(self, frame_or_series): tm.assert_equal(result, obj) @pytest.mark.parametrize("columns", [["A", "A", "B"], ["A", "A"]]) - def test_fillna_dictlike_value_duplicate_colnames(self, columns): + def test_fillna_dictlike_value_duplicate_colnames( + self, columns, using_array_manager + ): # GH#43476 + if using_array_manager and columns == ["A", "A", "B"]: + pytest.mark.xfail("Setting on duplicate columns not allowed") + df = DataFrame(np.nan, index=[0, 1], columns=columns) with tm.assert_produces_warning(None): result = df.fillna({"A": 0}) From f2e7568d908bae4a1b4453d83e10bcbeb25345ba Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 22 Jun 2022 22:53:40 +0100 Subject: [PATCH 4/5] just skip --- pandas/tests/frame/methods/test_fillna.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index ea90d6559b02f..1cac99524bc45 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -265,14 +265,9 @@ def test_fillna_downcast_false(self, frame_or_series): result = obj.fillna("", downcast=False) tm.assert_equal(result, obj) + @td.skip_array_manager_invalid_test @pytest.mark.parametrize("columns", [["A", "A", "B"], ["A", "A"]]) - def test_fillna_dictlike_value_duplicate_colnames( - self, columns, using_array_manager - ): - # GH#43476 - if using_array_manager and columns == ["A", "A", "B"]: - pytest.mark.xfail("Setting on duplicate columns not allowed") - + def test_fillna_dictlike_value_duplicate_colnames(self, columns): df = DataFrame(np.nan, index=[0, 1], columns=columns) with tm.assert_produces_warning(None): result = df.fillna({"A": 0}) From 6f39238e7c6f6102f2a6fa1f320e2a60bd9d8e23 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 22 Jun 2022 22:57:32 +0100 Subject: [PATCH 5/5] add back issue number for test_fillna_dictlike_value_duplicate_colnames --- pandas/tests/frame/methods/test_fillna.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 1cac99524bc45..33bd32ad65371 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -268,6 +268,7 @@ def test_fillna_downcast_false(self, frame_or_series): @td.skip_array_manager_invalid_test @pytest.mark.parametrize("columns", [["A", "A", "B"], ["A", "A"]]) def test_fillna_dictlike_value_duplicate_colnames(self, columns): + # GH#43476 df = DataFrame(np.nan, index=[0, 1], columns=columns) with tm.assert_produces_warning(None): result = df.fillna({"A": 0})