Skip to content

PERF: Eliminate circular references in accessor attributes #58733

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
191 changes: 0 additions & 191 deletions LICENSES/XARRAY_LICENSE

This file was deleted.

1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ Removal of prior version deprecations/changes

Performance improvements
~~~~~~~~~~~~~~~~~~~~~~~~
- Eliminated circular reference in to original pandas object in accessor attributes (e.g. :attr:`Series.str`). However, accessor instantiation is no longer cached (:issue:`47667`, :issue:`41357`)
- :attr:`Categorical.categories` returns a :class:`RangeIndex` columns instead of an :class:`Index` if the constructed ``values`` was a ``range``. (:issue:`57787`)
- :class:`DataFrame` returns a :class:`RangeIndex` columns when possible when ``data`` is a ``dict`` (:issue:`57943`)
- :class:`Series` returns a :class:`RangeIndex` index when possible when ``data`` is a ``dict`` (:issue:`58118`)
Expand Down
20 changes: 4 additions & 16 deletions pandas/core/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,17 +195,11 @@ def add_delegate_accessors(cls):
return add_delegate_accessors


# Ported with modifications from xarray; licence at LICENSES/XARRAY_LICENSE
# https://github.com/pydata/xarray/blob/master/xarray/core/extensions.py
# 1. We don't need to catch and re-raise AttributeErrors as RuntimeErrors
# 2. We use a UserWarning instead of a custom Warning


class CachedAccessor:
class Accessor:
"""
Custom property-like object.
A descriptor for caching accessors.
A descriptor for accessors.
Parameters
----------
Expand All @@ -229,13 +223,7 @@ def __get__(self, obj, cls):
if obj is None:
# we're accessing the attribute of the class, i.e., Dataset.geo
return self._accessor
accessor_obj = self._accessor(obj)
# Replace the property with the accessor object. Inspired by:
# https://www.pydanny.com/cached-property.html
# We need to use object.__setattr__ because we overwrite __setattr__ on
# NDFrame
object.__setattr__(obj, self._name, accessor_obj)
return accessor_obj
return self._accessor(obj)


@doc(klass="", examples="", others="")
Expand Down Expand Up @@ -295,7 +283,7 @@ def decorator(accessor: TypeT) -> TypeT:
UserWarning,
stacklevel=find_stack_level(),
)
setattr(cls, name, CachedAccessor(name, accessor))
setattr(cls, name, Accessor(name, accessor))
cls._accessors.add(name)
return accessor

Expand Down
6 changes: 3 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@
ops,
roperator,
)
from pandas.core.accessor import CachedAccessor
from pandas.core.accessor import Accessor
from pandas.core.apply import reconstruct_and_relabel_result
from pandas.core.array_algos.take import take_2d_multi
from pandas.core.arraylike import OpsMixin
Expand Down Expand Up @@ -13487,10 +13487,10 @@ def isin_(x):

# ----------------------------------------------------------------------
# Add plotting methods to DataFrame
plot = CachedAccessor("plot", pandas.plotting.PlotAccessor)
plot = Accessor("plot", pandas.plotting.PlotAccessor)
hist = pandas.plotting.hist_frame
boxplot = pandas.plotting.boxplot_frame
sparse = CachedAccessor("sparse", SparseFrameAccessor)
sparse = Accessor("sparse", SparseFrameAccessor)

# ----------------------------------------------------------------------
# Internal Interface Methods
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@
nanops,
ops,
)
from pandas.core.accessor import CachedAccessor
from pandas.core.accessor import Accessor
import pandas.core.algorithms as algos
from pandas.core.array_algos.putmask import (
setitem_datetimelike_compat,
Expand Down Expand Up @@ -462,7 +462,7 @@ def _engine_type(

_accessors = {"str"}

str = CachedAccessor("str", StringMethods)
str = Accessor("str", StringMethods)

_references = None

Expand Down
16 changes: 8 additions & 8 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@
ops,
roperator,
)
from pandas.core.accessor import CachedAccessor
from pandas.core.accessor import Accessor
from pandas.core.apply import SeriesApply
from pandas.core.arrays import ExtensionArray
from pandas.core.arrays.arrow import (
Expand Down Expand Up @@ -5754,13 +5754,13 @@ def to_period(
# ----------------------------------------------------------------------
# Accessor Methods
# ----------------------------------------------------------------------
str = CachedAccessor("str", StringMethods)
dt = CachedAccessor("dt", CombinedDatetimelikeProperties)
cat = CachedAccessor("cat", CategoricalAccessor)
plot = CachedAccessor("plot", pandas.plotting.PlotAccessor)
sparse = CachedAccessor("sparse", SparseAccessor)
struct = CachedAccessor("struct", StructAccessor)
list = CachedAccessor("list", ListAccessor)
str = Accessor("str", StringMethods)
dt = Accessor("dt", CombinedDatetimelikeProperties)
cat = Accessor("cat", CategoricalAccessor)
plot = Accessor("plot", pandas.plotting.PlotAccessor)
sparse = Accessor("sparse", SparseAccessor)
struct = Accessor("struct", StructAccessor)
list = Accessor("list", ListAccessor)

# ----------------------------------------------------------------------
# Add plotting methods to Series
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/strings/test_api.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import weakref

import numpy as np
import pytest

Expand Down Expand Up @@ -68,6 +70,15 @@ def test_api(any_string_dtype):
assert isinstance(Series([""], dtype=any_string_dtype).str, StringMethods)


def test_no_circular_reference(any_string_dtype):
# GH 47667
ser = Series([""], dtype=any_string_dtype)
ref = weakref.ref(ser)
ser.str # Used to cache and cause circular reference
del ser
assert ref() is None


def test_api_mi_raises():
# GH 23679
mi = MultiIndex.from_arrays([["a", "b", "c"]])
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/test_register_accessor.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from collections.abc import Generator
import contextlib
import weakref

import pytest

Expand Down Expand Up @@ -101,3 +102,22 @@ def __init__(self, data) -> None:

with pytest.raises(AttributeError, match="whoops"):
pd.Series([], dtype=object).bad


@pytest.mark.parametrize(
"klass, registrar",
[
(pd.Series, pd.api.extensions.register_series_accessor),
(pd.DataFrame, pd.api.extensions.register_dataframe_accessor),
(pd.Index, pd.api.extensions.register_index_accessor),
],
)
def test_no_circular_reference(klass, registrar):
# GH 41357
with ensure_removed(klass, "access"):
registrar("access")(MyAccessor)
obj = klass([0])
ref = weakref.ref(obj)
assert obj.access.obj is obj
del obj
assert ref() is None