From f44dd5e2d282d6fe16f87f304c50f071573a47f8 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 10 Oct 2020 13:58:49 -0500 Subject: [PATCH 1/4] Fixed metadata propagation in DataFrame.__getitem__ xref #28283 --- doc/source/whatsnew/v1.2.0.rst | 6 ++++++ pandas/core/generic.py | 2 +- pandas/tests/generic/test_finalize.py | 15 +++------------ 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 47ebd962b367c..fc7e403f0ab8c 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -442,6 +442,12 @@ Other - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly raising ``AssertionError`` instead of ``ValueError`` when invalid parameter combinations are passed (:issue:`36045`) - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` with numeric values and string ``to_replace`` (:issue:`34789`) +- Fixed metadata propagation in the following methods (:issue:`28283`): + + - ``DataFrame.__getitem__`` + - :meth:`DataFrame.pop` + - :meth:`DataFrame.get` + - Fixed metadata propagation in the :class:`Series.dt` accessor (:issue:`28283`) - Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was dictionary (:issue:`35811`) - Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 04e1fc91c5fd4..9c77be160d887 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3720,7 +3720,7 @@ def _get_item_cache(self, item): loc = self.columns.get_loc(item) values = self._mgr.iget(loc) - res = self._box_col_values(values, loc) + res = self._box_col_values(values, loc).__finalize__(self) cache[item] = res res._set_as_cached(item, self) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 6692102bc9008..89752a9da1486 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -85,10 +85,7 @@ marks=pytest.mark.xfail(reason="Implement binary finalize"), ), (pd.DataFrame, frame_data, operator.methodcaller("transpose")), - pytest.param( - (pd.DataFrame, frame_data, operator.methodcaller("__getitem__", "A")), - marks=not_implemented_mark, - ), + (pd.DataFrame, frame_data, operator.methodcaller("__getitem__", "A")), (pd.DataFrame, frame_data, operator.methodcaller("__getitem__", ["A"])), (pd.DataFrame, frame_data, operator.methodcaller("__getitem__", np.array([True]))), (pd.DataFrame, ({("A", "a"): [1]},), operator.methodcaller("__getitem__", ["A"])), @@ -308,10 +305,7 @@ ), (pd.DataFrame, frame_data, operator.methodcaller("swapaxes", 0, 1)), (pd.DataFrame, frame_mi_data, operator.methodcaller("droplevel", "A")), - pytest.param( - (pd.DataFrame, frame_data, operator.methodcaller("pop", "A")), - marks=not_implemented_mark, - ), + (pd.DataFrame, frame_data, operator.methodcaller("pop", "A")), pytest.param( (pd.DataFrame, frame_data, operator.methodcaller("squeeze")), marks=not_implemented_mark, @@ -336,10 +330,7 @@ (pd.DataFrame, frame_data, operator.methodcaller("take", [0, 0])), (pd.DataFrame, frame_mi_data, operator.methodcaller("xs", "a")), (pd.Series, (1, mi), operator.methodcaller("xs", "a")), - pytest.param( - (pd.DataFrame, frame_data, operator.methodcaller("get", "A")), - marks=not_implemented_mark, - ), + (pd.DataFrame, frame_data, operator.methodcaller("get", "A")), ( pd.DataFrame, frame_data, From 0bd52bbf12f97fa1bf097fb99276b87d22ba19c6 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 13 Nov 2020 10:22:09 -0600 Subject: [PATCH 2/4] fixup --- pandas/tests/generic/test_finalize.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 6531ff27d246e..7930758bafb90 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -90,10 +90,7 @@ (pd.DataFrame, frame_data, operator.methodcaller("__getitem__", np.array([True]))), (pd.DataFrame, ({("A", "a"): [1]},), operator.methodcaller("__getitem__", ["A"])), (pd.DataFrame, frame_data, operator.methodcaller("query", "A == 1")), - pytest.param( - (pd.DataFrame, frame_data, operator.methodcaller("eval", "A + 1")), - marks=not_implemented_mark, - ), + (pd.DataFrame, frame_data, operator.methodcaller("eval", "A + 1")), (pd.DataFrame, frame_data, operator.methodcaller("select_dtypes", include="int")), (pd.DataFrame, frame_data, operator.methodcaller("assign", b=1)), (pd.DataFrame, frame_data, operator.methodcaller("set_axis", ["A"])), From 1d8217a95f501d17fd66d1dd515abd83d1d8809b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 13 Nov 2020 11:04:23 -0600 Subject: [PATCH 3/4] xpass --- pandas/tests/generic/test_duplicate_labels.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/generic/test_duplicate_labels.py b/pandas/tests/generic/test_duplicate_labels.py index 42745d2a69375..3f7bebd86e983 100644 --- a/pandas/tests/generic/test_duplicate_labels.py +++ b/pandas/tests/generic/test_duplicate_labels.py @@ -312,9 +312,7 @@ def test_series_raises(self, func): pytest.param( operator.itemgetter(("a", ["A", "A"])), "loc", marks=not_implemented ), - pytest.param( - operator.itemgetter((["a", "a"], "A")), "loc", marks=not_implemented - ), + (operator.itemgetter((["a", "a"], "A")), "loc"), # iloc (operator.itemgetter([0, 0]), "iloc"), pytest.param( From 71e5aec39ec85b1d1b6b13c12edc7ad8706e1d0f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 14 Nov 2020 14:10:24 -0600 Subject: [PATCH 4/4] xfail numexpr --- pandas/tests/generic/test_finalize.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 7930758bafb90..ecd70bb415334 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -90,7 +90,7 @@ (pd.DataFrame, frame_data, operator.methodcaller("__getitem__", np.array([True]))), (pd.DataFrame, ({("A", "a"): [1]},), operator.methodcaller("__getitem__", ["A"])), (pd.DataFrame, frame_data, operator.methodcaller("query", "A == 1")), - (pd.DataFrame, frame_data, operator.methodcaller("eval", "A + 1")), + (pd.DataFrame, frame_data, operator.methodcaller("eval", "A + 1", engine="python")), (pd.DataFrame, frame_data, operator.methodcaller("select_dtypes", include="int")), (pd.DataFrame, frame_data, operator.methodcaller("assign", b=1)), (pd.DataFrame, frame_data, operator.methodcaller("set_axis", ["A"])), @@ -520,6 +520,15 @@ def test_finalize_called(ndframe_method): assert result.attrs == {"a": 1} +@not_implemented_mark +def test_finalize_called_eval_numexpr(): + pytest.importorskip("numexpr") + df = pd.DataFrame({"A": [1, 2]}) + df.attrs["A"] = 1 + result = df.eval("A + 1", engine="numexpr") + assert result.attrs == {"A": 1} + + # ---------------------------------------------------------------------------- # Binary operations