diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 831543ee66039..3eabeafe296ae 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -76,6 +76,7 @@ from pandas.core.indexes.period import Period, PeriodIndex import pandas.core.indexing as indexing from pandas.core.internals import BlockManager +from pandas.core.meta import PandasMetadata from pandas.core.ops import _align_method_FRAME from pandas.io.formats import format as fmt @@ -5163,7 +5164,7 @@ def pipe(self, func, *args, **kwargs): # ---------------------------------------------------------------------- # Attribute access - def __finalize__(self, other, method=None, **kwargs): + def __finalize__(self, other, method=None): """ Propagate metadata from other to self. @@ -5175,9 +5176,10 @@ def __finalize__(self, other, method=None, **kwargs): types of propagation actions based on this """ - if isinstance(other, NDFrame): - for name in self._metadata: - object.__setattr__(self, name, getattr(other, name, None)) + for name in self._metadata: + finalizer = PandasMetadata(name) + finalizer.finalize(self, other, method) + return self def __getattr__(self, name): diff --git a/pandas/core/meta.py b/pandas/core/meta.py new file mode 100644 index 0000000000000..bf2617b446a39 --- /dev/null +++ b/pandas/core/meta.py @@ -0,0 +1,130 @@ +""" +Metadata propagation through pandas operations. + +This module contains the infrastructure for propagating ``NDFrame._metadata`` +through operations. We perform an operation (say :meth:`pandas.Series.copy`) that +returns an ``NDFrame`` and would like to propagate the metadata (say ``Series.name``) +from ``self`` to the new ``NDFrame``. +""" +from typing import TYPE_CHECKING, Any, Dict + +from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries + +if TYPE_CHECKING: + from pandas._typing import FrameOrSeries + + +class PandasMetadataType(type): + """ + Metaclass controlling creation of metadata finalizers. + + This ensures we have one finalizer instance per name, and + provides a place to look up finalizer per name. + """ + + # TODO(Py35): Replace metaclass with __subclass_init__ + + _instances = {} # type: Dict[str, "PandasMetadata"] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def __call__(self, name, *args: Any, **kwds: Any) -> Any: # type: ignore + if name in self._instances: + return self._instances[name] + else: + new = super().__call__(name, *args, **kwds) + self._instances[name] = new + return new + + +class PandasMetadata(metaclass=PandasMetadataType): + """ + Dispatch metadata finalization for pandas metadata. + + Parameters + ---------- + name : str + The name of the attribute being finalized. + + Examples + -------- + If you want the default resolution (copy from a source NDFrame + to a new NDFrame), you can just create an instance + + >>> mymeta = PandasMetadata("mymeta") + + If you need custom metadata resolution, you'll need to subclass. + + >>> class IncrementMetadata: + ... def default(self, new, other): + ... setattr(new, self.attr, getattr(other, self.name, -1) + 1) + + >>> increment_metadata = IncrementMetadata("attr") + """ + + def __init__(self, name: str): + self.name = name + + def __repr__(self): + return "PandasMetadata(name='{}')".format(self.name) + + def finalize(self, new: "FrameOrSeries", other: Any, method): + """ + Run the finalization for `method`. + + Parameters + ---------- + new : DataFrame or Series + other : Any + One of the following types + + * DataFrame + * Series + * Concatenator + * MergeOperation + + method : str + The source method. + + Returns + ------- + None + Expected to operate inplace. + + Notes + ----- + The default implementation simply calls ``.default``, ignoring `method`. + """ + self.default(new, other) + + def default(self, new: "FrameOrSeries", other: Any): + """ + The default finalizer when this method, attribute hasn't been overridden. + + This copies the ``_metadata`` attribute from ``other`` to ``self``, modifying + ``self`` inplace. + + Parameters + ---------- + new : NDFrame + The newly created NDFrame being finalized. + other : Any + The source object attributes will be extracted from. + """ + # TODO: check perf on this isinstance. + if isinstance(other, (ABCSeries, ABCDataFrame)): + object.__setattr__(new, self.name, getattr(other, self.name, None)) + + +class NameMetadata(PandasMetadata): + """Finalization for Series.name""" + + +# TODO: having to create this here feels weird. +name_metadata = NameMetadata("name") + +# For backwards compat. Do we care about this? +# We can pretty easily deprecate, require subclasses to make their +# own instance. +default_finalizer = PandasMetadata("pandas") diff --git a/pandas/core/series.py b/pandas/core/series.py index 10d50e89ca92e..bcfd1884172e2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3999,6 +3999,7 @@ def f(x): if len(mapped) and isinstance(mapped[0], Series): # GH 25959 use pd.array instead of tolist # so extension arrays can be used + # TODO: would like to apply finalize here. return self._constructor_expanddim(pd.array(mapped), index=self.index) else: return self._constructor(mapped, index=self.index).__finalize__(self) diff --git a/pandas/tests/generic/test_metadata.py b/pandas/tests/generic/test_metadata.py new file mode 100644 index 0000000000000..0a7ca78f11f99 --- /dev/null +++ b/pandas/tests/generic/test_metadata.py @@ -0,0 +1,58 @@ +import pytest + +import pandas as pd +from pandas.core.meta import PandasMetadata + + +class MyMeta(PandasMetadata): + def finalize(self, new, other, method): + if method == "concat": + self.finalize_concat(new, other) + elif method == "copy": + self.finalize_copy(new, other) + else: + super().finalize(new, other, method) + + def default(self, new, other): + new.attr = other.attr + 1 + + def finalize_concat(self, new, other): + assert isinstance(other, pd.core.reshape.concat._Concatenator) + new.attr = sum(x.attr for x in other.objs) + + +mymeta = MyMeta("attr") + + +@pytest.fixture +def custom_meta(monkeypatch): + original_metadata = [] + + for cls in [pd.Series, pd.DataFrame]: + original_metadata.append(cls._metadata) + custom_metadata = cls._metadata.copy() + custom_metadata.append("attr") + + monkeypatch.setattr(cls, "_metadata", custom_metadata) + + +@pytest.mark.usefixtures("custom_meta") +def test_custom_finalizer(): + + df = pd.DataFrame({"A": [1, 2]}) + df.attr = 0 + + result = df.copy() + assert result.attr == 1 + + +@pytest.mark.usefixtures("custom_meta") +def test_concat(): + df1 = pd.DataFrame({"A": [1, 2]}) + df1.attr = 2 + + df2 = pd.DataFrame({"A": [1, 2]}) + df2.attr = 3 + + result = pd.concat([df1, df2]) + assert result.attr == 5