Skip to content

REF/ENH: Refactor NDFrame finalization #28334

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
from pandas.core.indexes.period import Period, PeriodIndex
import pandas.core.indexing as indexing
from pandas.core.internals import BlockManager
from pandas.core.meta import PandasMetadata
from pandas.core.ops import _align_method_FRAME

from pandas.io.formats import format as fmt
Expand Down Expand Up @@ -5163,7 +5164,7 @@ def pipe(self, func, *args, **kwargs):
# ----------------------------------------------------------------------
# Attribute access

def __finalize__(self, other, method=None, **kwargs):
def __finalize__(self, other, method=None):
"""
Propagate metadata from other to self.

Expand All @@ -5175,9 +5176,10 @@ def __finalize__(self, other, method=None, **kwargs):
types of propagation actions based on this

"""
if isinstance(other, NDFrame):
for name in self._metadata:
object.__setattr__(self, name, getattr(other, name, None))
for name in self._metadata:
finalizer = PandasMetadata(name)
finalizer.finalize(self, other, method)

return self

def __getattr__(self, name):
Expand Down
130 changes: 130 additions & 0 deletions pandas/core/meta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
"""
Metadata propagation through pandas operations.

This module contains the infrastructure for propagating ``NDFrame._metadata``
through operations. We perform an operation (say :meth:`pandas.Series.copy`) that
returns an ``NDFrame`` and would like to propagate the metadata (say ``Series.name``)
from ``self`` to the new ``NDFrame``.
"""
from typing import TYPE_CHECKING, Any, Dict

from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries

if TYPE_CHECKING:
from pandas._typing import FrameOrSeries


class PandasMetadataType(type):
"""
Metaclass controlling creation of metadata finalizers.

This ensures we have one finalizer instance per name, and
provides a place to look up finalizer per name.
"""

# TODO(Py35): Replace metaclass with __subclass_init__

_instances = {} # type: Dict[str, "PandasMetadata"]

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

def __call__(self, name, *args: Any, **kwds: Any) -> Any: # type: ignore
if name in self._instances:
return self._instances[name]
else:
new = super().__call__(name, *args, **kwds)
self._instances[name] = new
return new


class PandasMetadata(metaclass=PandasMetadataType):
"""
Dispatch metadata finalization for pandas metadata.

Parameters
----------
name : str
The name of the attribute being finalized.

Examples
--------
If you want the default resolution (copy from a source NDFrame
to a new NDFrame), you can just create an instance

>>> mymeta = PandasMetadata("mymeta")

If you need custom metadata resolution, you'll need to subclass.

>>> class IncrementMetadata:
... def default(self, new, other):
... setattr(new, self.attr, getattr(other, self.name, -1) + 1)

>>> increment_metadata = IncrementMetadata("attr")
"""

def __init__(self, name: str):
self.name = name

def __repr__(self):
return "PandasMetadata(name='{}')".format(self.name)

def finalize(self, new: "FrameOrSeries", other: Any, method):
"""
Run the finalization for `method`.

Parameters
----------
new : DataFrame or Series
other : Any
One of the following types

* DataFrame
* Series
* Concatenator
* MergeOperation

method : str
The source method.

Returns
-------
None
Expected to operate inplace.

Notes
-----
The default implementation simply calls ``.default``, ignoring `method`.
"""
self.default(new, other)

def default(self, new: "FrameOrSeries", other: Any):
"""
The default finalizer when this method, attribute hasn't been overridden.

This copies the ``_metadata`` attribute from ``other`` to ``self``, modifying
``self`` inplace.

Parameters
----------
new : NDFrame
The newly created NDFrame being finalized.
other : Any
The source object attributes will be extracted from.
"""
# TODO: check perf on this isinstance.
if isinstance(other, (ABCSeries, ABCDataFrame)):
object.__setattr__(new, self.name, getattr(other, self.name, None))


class NameMetadata(PandasMetadata):
"""Finalization for Series.name"""


# TODO: having to create this here feels weird.
name_metadata = NameMetadata("name")

# For backwards compat. Do we care about this?
# We can pretty easily deprecate, require subclasses to make their
# own instance.
default_finalizer = PandasMetadata("pandas")
1 change: 1 addition & 0 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3999,6 +3999,7 @@ def f(x):
if len(mapped) and isinstance(mapped[0], Series):
# GH 25959 use pd.array instead of tolist
# so extension arrays can be used
# TODO: would like to apply finalize here.
return self._constructor_expanddim(pd.array(mapped), index=self.index)
else:
return self._constructor(mapped, index=self.index).__finalize__(self)
Expand Down
58 changes: 58 additions & 0 deletions pandas/tests/generic/test_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import pytest

import pandas as pd
from pandas.core.meta import PandasMetadata


class MyMeta(PandasMetadata):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you have an implementation for index_allows_duplicates? I'd be more comfortable if there were a more fully fleshed-out example/test.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I thought I did but am having trouble finding it right now.

def finalize(self, new, other, method):
if method == "concat":
self.finalize_concat(new, other)
elif method == "copy":
self.finalize_copy(new, other)
else:
super().finalize(new, other, method)

def default(self, new, other):
new.attr = other.attr + 1

def finalize_concat(self, new, other):
assert isinstance(other, pd.core.reshape.concat._Concatenator)
new.attr = sum(x.attr for x in other.objs)


mymeta = MyMeta("attr")


@pytest.fixture
def custom_meta(monkeypatch):
original_metadata = []

for cls in [pd.Series, pd.DataFrame]:
original_metadata.append(cls._metadata)
custom_metadata = cls._metadata.copy()
custom_metadata.append("attr")

monkeypatch.setattr(cls, "_metadata", custom_metadata)


@pytest.mark.usefixtures("custom_meta")
def test_custom_finalizer():

df = pd.DataFrame({"A": [1, 2]})
df.attr = 0

result = df.copy()
assert result.attr == 1


@pytest.mark.usefixtures("custom_meta")
def test_concat():
df1 = pd.DataFrame({"A": [1, 2]})
df1.attr = 2

df2 = pd.DataFrame({"A": [1, 2]})
df2.attr = 3

result = pd.concat([df1, df2])
assert result.attr == 5