From ed0c2b0b85fef68a806026a300320f6929077db9 Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 13 Jun 2022 10:55:22 +0200 Subject: [PATCH 1/9] REGR: Fix nan comparison for same Index object --- doc/source/whatsnew/v1.4.3.rst | 1 + pandas/core/generic.py | 4 +++- pandas/tests/frame/methods/test_fillna.py | 10 ++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.3.rst b/doc/source/whatsnew/v1.4.3.rst index ca8b8ca15ec47..5fecd226fba93 100644 --- a/doc/source/whatsnew/v1.4.3.rst +++ b/doc/source/whatsnew/v1.4.3.rst @@ -16,6 +16,7 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed regression in :meth:`DataFrame.replace` when the replacement value was explicitly ``None`` when passed in a dictionary to ``to_replace`` also casting other columns to object dtype even when there were no values to replace (:issue:`46634`) - Fixed regression when setting values with :meth:`DataFrame.loc` updating :class:`RangeIndex` when index was set as new column and column was updated afterwards (:issue:`47128`) +- Fixed regression in :meth:`DataFrame.fillna` creating a copy when a dictionary was given as a fill value (:issue:`47188`) - Fixed regression in :meth:`DataFrame.nsmallest` led to wrong results when ``np.nan`` in the sorting column (:issue:`46589`) - Fixed regression in :func:`read_fwf` raising ``ValueError`` when ``widths`` was specified with ``usecols`` (:issue:`46580`) - Fixed regression in :func:`concat` not sorting columns for mixed column names (:issue:`47127`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 673228a758aca..75a599debbc69 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6521,7 +6521,9 @@ def fillna( if k not in result: continue downcast_k = downcast if not is_dict else downcast.get(k) - result[k] = result[k].fillna(v, limit=limit, downcast=downcast_k) + result.loc[:, k] = result[k].fillna( + v, limit=limit, downcast=downcast_k + ) return result if not inplace else None elif not is_list_like(value): diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 5008e64dd0e99..3a1f525f04807 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -673,6 +673,16 @@ def test_fillna_inplace_with_columns_limit_and_value(self): df.fillna(axis=1, value=100, limit=1, inplace=True) tm.assert_frame_equal(df, expected) + @pytest.mark.parametrize("val", [-1, {"x": -1, "y": -1}]) + def test_inplace_dict_update_view(self, val): + # GH#47188 + df = DataFrame({"x": [np.nan, 2], "y": [np.nan, 2]}) + result_view = df["x"] + df.fillna(val, inplace=True) + expected = DataFrame({"x": [-1, 2.0], "y": [-1.0, 2]}) + tm.assert_frame_equal(df, expected) + tm.assert_series_equal(result_view, expected["x"]) + def test_fillna_nonconsolidated_frame(): # https://github.com/pandas-dev/pandas/issues/36495 From c3c8e33ce9ca4ded79fceb9764c207e551ee0618 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 18 Jun 2022 22:52:12 +0200 Subject: [PATCH 2/9] Return nbs immediately --- pandas/core/internals/blocks.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 49efecec7472e..a06830019a9b1 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1183,6 +1183,9 @@ def fillna( # test_fillna_dtype_conversion_equiv_replace nbs = self.where(value, ~mask.T, _downcast=False) + if inplace: + return nbs + # Note: blk._maybe_downcast vs self._maybe_downcast(nbs) # makes a difference bc blk may have object dtype, which has # different behavior in _maybe_downcast. From a70a92863d5812cc41bd4417a355cfff7a9c9437 Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 20 Jun 2022 22:01:59 +0200 Subject: [PATCH 3/9] Try view on whole frame --- pandas/tests/frame/methods/test_fillna.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 3a1f525f04807..c272107003b1c 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -677,11 +677,11 @@ def test_fillna_inplace_with_columns_limit_and_value(self): def test_inplace_dict_update_view(self, val): # GH#47188 df = DataFrame({"x": [np.nan, 2], "y": [np.nan, 2]}) - result_view = df["x"] + result_view = df[:] df.fillna(val, inplace=True) expected = DataFrame({"x": [-1, 2.0], "y": [-1.0, 2]}) tm.assert_frame_equal(df, expected) - tm.assert_series_equal(result_view, expected["x"]) + tm.assert_frame_equal(result_view, expected) def test_fillna_nonconsolidated_frame(): From 72580ffbb0b051af71b3c8b1db32125adf9b77e8 Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 20 Jun 2022 23:00:29 +0200 Subject: [PATCH 4/9] Skip for array manager --- pandas/tests/frame/methods/test_fillna.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index c272107003b1c..eb798fd4622dd 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -673,6 +673,7 @@ def test_fillna_inplace_with_columns_limit_and_value(self): df.fillna(axis=1, value=100, limit=1, inplace=True) tm.assert_frame_equal(df, expected) + @td.skip_array_manager_invalid_test @pytest.mark.parametrize("val", [-1, {"x": -1, "y": -1}]) def test_inplace_dict_update_view(self, val): # GH#47188 From df38c5192f571a1bff43dd9bdf1385e57d0e7caa Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 21 Jun 2022 00:08:08 +0200 Subject: [PATCH 5/9] Skip tests --- pandas/tests/frame/methods/test_fillna.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index eb798fd4622dd..4a526fd0cd24c 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -284,6 +284,7 @@ def test_fillna_downcast_noop(self, frame_or_series): res3 = obj2.fillna("foo", downcast=np.dtype(np.int32)) tm.assert_equal(res3, expected) + @td.skip_array_manager_not_yet_implemented @pytest.mark.parametrize("columns", [["A", "A", "B"], ["A", "A"]]) def test_fillna_dictlike_value_duplicate_colnames(self, columns): # GH#43476 @@ -673,7 +674,7 @@ def test_fillna_inplace_with_columns_limit_and_value(self): df.fillna(axis=1, value=100, limit=1, inplace=True) tm.assert_frame_equal(df, expected) - @td.skip_array_manager_invalid_test + @td.skip_array_manager_not_yet_implemented @pytest.mark.parametrize("val", [-1, {"x": -1, "y": -1}]) def test_inplace_dict_update_view(self, val): # GH#47188 From 7a4457af286b3253a0f865dd3878b8fee6ad3f06 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 21 Jun 2022 09:50:34 +0200 Subject: [PATCH 6/9] Skip tests --- pandas/tests/frame/methods/test_fillna.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 4a526fd0cd24c..f5c9dd65e4760 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -674,7 +674,7 @@ def test_fillna_inplace_with_columns_limit_and_value(self): df.fillna(axis=1, value=100, limit=1, inplace=True) tm.assert_frame_equal(df, expected) - @td.skip_array_manager_not_yet_implemented + @td.skip_array_manager_invalid_test @pytest.mark.parametrize("val", [-1, {"x": -1, "y": -1}]) def test_inplace_dict_update_view(self, val): # GH#47188 From f5feeabf92b3798eb3c95cc800f70c08eb8566c9 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 21 Jun 2022 12:13:04 +0200 Subject: [PATCH 7/9] Remove inplace return --- pandas/core/internals/blocks.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index a06830019a9b1..49efecec7472e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1183,9 +1183,6 @@ def fillna( # test_fillna_dtype_conversion_equiv_replace nbs = self.where(value, ~mask.T, _downcast=False) - if inplace: - return nbs - # Note: blk._maybe_downcast vs self._maybe_downcast(nbs) # makes a difference bc blk may have object dtype, which has # different behavior in _maybe_downcast. From fc86283df5fc4b612dd98697388952463781108e Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 21 Jun 2022 12:29:30 +0200 Subject: [PATCH 8/9] Add other methods --- doc/source/whatsnew/v1.4.3.rst | 2 +- pandas/core/computation/eval.py | 2 +- pandas/core/frame.py | 2 +- pandas/tests/frame/methods/test_update.py | 13 +++++++++++++ pandas/tests/frame/test_query_eval.py | 10 ++++++++++ 5 files changed, 26 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.4.3.rst b/doc/source/whatsnew/v1.4.3.rst index 28b8da78fcbfb..f1e1c798b28f6 100644 --- a/doc/source/whatsnew/v1.4.3.rst +++ b/doc/source/whatsnew/v1.4.3.rst @@ -18,7 +18,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.to_csv` raising error when :class:`DataFrame` contains extension dtype categorical column (:issue:`46297`, :issue:`46812`) - Fixed regression in representation of ``dtypes`` attribute of :class:`MultiIndex` (:issue:`46900`) - Fixed regression when setting values with :meth:`DataFrame.loc` updating :class:`RangeIndex` when index was set as new column and column was updated afterwards (:issue:`47128`) -- Fixed regression in :meth:`DataFrame.fillna` creating a copy when a dictionary was given as a fill value (:issue:`47188`) +- Fixed regression in :meth:`DataFrame.fillna`, :meth:`DataFrame.eval` and :meth:`DataFrame.query` creating a copy when updating inplace (:issue:`47188`) - Fixed regression in :meth:`DataFrame.nsmallest` led to wrong results when ``np.nan`` in the sorting column (:issue:`46589`) - Fixed regression in :func:`read_fwf` raising ``ValueError`` when ``widths`` was specified with ``usecols`` (:issue:`46580`) - Fixed regression in :func:`concat` not sorting columns for mixed column names (:issue:`47127`) diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index d82cc37b90ad4..3312eff61c440 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -384,7 +384,7 @@ def eval( try: with warnings.catch_warnings(record=True): # TODO: Filter the warnings we actually care about here. - target[assigner] = ret + target.loc[:, assigner] = ret except (TypeError, IndexError) as err: raise ValueError("Cannot assign expression output to target") from err diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 39a940169e1f3..8711d53353185 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8000,7 +8000,7 @@ def update( if mask.all(): continue - self[col] = expressions.where(mask, this, that) + self.loc[:, col] = expressions.where(mask, this, that) # ---------------------------------------------------------------------- # Data reshaping diff --git a/pandas/tests/frame/methods/test_update.py b/pandas/tests/frame/methods/test_update.py index 408113e9bc417..d3257ac09a0ab 100644 --- a/pandas/tests/frame/methods/test_update.py +++ b/pandas/tests/frame/methods/test_update.py @@ -1,6 +1,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + import pandas as pd from pandas import ( DataFrame, @@ -146,3 +148,14 @@ def test_update_with_different_dtype(self): expected = DataFrame({"a": [1, 3], "b": [np.nan, 2], "c": ["foo", np.nan]}) tm.assert_frame_equal(df, expected) + + @td.skip_array_manager_invalid_test + def test_update_modify_view(self): + # GH#47188 + df = DataFrame({"A": ["1", np.nan], "B": ["100", np.nan]}) + df2 = DataFrame({"A": ["a", "x"], "B": ["100", "200"]}) + result_view = df2[:] + df2.update(df) + expected = DataFrame({"A": ["1", "x"], "B": ["100", "200"]}) + tm.assert_frame_equal(df2, expected) + tm.assert_frame_equal(result_view, expected) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 35335c54cd41e..ed73c1a8b283f 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -183,6 +183,16 @@ def test_eval_object_dtype_binop(self): expected = DataFrame({"a1": ["Y", "N"], "c": [True, False]}) tm.assert_frame_equal(res, expected) + @td.skip_array_manager_invalid_test + def test_eval_update_view_inplace(self): + # GH#47188 + df = DataFrame({"A": [1, 2], "B": [10, 9], "C": [11, 12]}) + result_view = df[:] + df.eval("A = B + C", inplace=True) + expected = DataFrame({"A": [21, 21], "B": [10, 9], "C": [11, 12]}) + tm.assert_frame_equal(df, expected) + tm.assert_frame_equal(result_view, expected) + class TestDataFrameQueryWithMultiIndex: def test_query_with_named_multiindex(self, parser, engine): From c7273d013914d06a6d136ab09397ac7fd678e4b2 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 21 Jun 2022 13:33:48 +0200 Subject: [PATCH 9/9] Remove eval --- doc/source/whatsnew/v1.4.3.rst | 2 +- pandas/core/computation/eval.py | 2 +- pandas/tests/frame/test_query_eval.py | 10 ---------- 3 files changed, 2 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v1.4.3.rst b/doc/source/whatsnew/v1.4.3.rst index f1e1c798b28f6..d031426a2abbf 100644 --- a/doc/source/whatsnew/v1.4.3.rst +++ b/doc/source/whatsnew/v1.4.3.rst @@ -18,7 +18,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.to_csv` raising error when :class:`DataFrame` contains extension dtype categorical column (:issue:`46297`, :issue:`46812`) - Fixed regression in representation of ``dtypes`` attribute of :class:`MultiIndex` (:issue:`46900`) - Fixed regression when setting values with :meth:`DataFrame.loc` updating :class:`RangeIndex` when index was set as new column and column was updated afterwards (:issue:`47128`) -- Fixed regression in :meth:`DataFrame.fillna`, :meth:`DataFrame.eval` and :meth:`DataFrame.query` creating a copy when updating inplace (:issue:`47188`) +- Fixed regression in :meth:`DataFrame.fillna` and :meth:`DataFrame.update` creating a copy when updating inplace (:issue:`47188`) - Fixed regression in :meth:`DataFrame.nsmallest` led to wrong results when ``np.nan`` in the sorting column (:issue:`46589`) - Fixed regression in :func:`read_fwf` raising ``ValueError`` when ``widths`` was specified with ``usecols`` (:issue:`46580`) - Fixed regression in :func:`concat` not sorting columns for mixed column names (:issue:`47127`) diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index 3312eff61c440..d82cc37b90ad4 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -384,7 +384,7 @@ def eval( try: with warnings.catch_warnings(record=True): # TODO: Filter the warnings we actually care about here. - target.loc[:, assigner] = ret + target[assigner] = ret except (TypeError, IndexError) as err: raise ValueError("Cannot assign expression output to target") from err diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index ed73c1a8b283f..35335c54cd41e 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -183,16 +183,6 @@ def test_eval_object_dtype_binop(self): expected = DataFrame({"a1": ["Y", "N"], "c": [True, False]}) tm.assert_frame_equal(res, expected) - @td.skip_array_manager_invalid_test - def test_eval_update_view_inplace(self): - # GH#47188 - df = DataFrame({"A": [1, 2], "B": [10, 9], "C": [11, 12]}) - result_view = df[:] - df.eval("A = B + C", inplace=True) - expected = DataFrame({"A": [21, 21], "B": [10, 9], "C": [11, 12]}) - tm.assert_frame_equal(df, expected) - tm.assert_frame_equal(result_view, expected) - class TestDataFrameQueryWithMultiIndex: def test_query_with_named_multiindex(self, parser, engine):