Skip to content

Commit bfa4eae

Browse files
authored
PERF: Eliminate circular references in accessor attributes (#58733)
1 parent 9cc9f31 commit bfa4eae

File tree

8 files changed

+49
-220
lines changed

8 files changed

+49
-220
lines changed

LICENSES/XARRAY_LICENSE

-191
This file was deleted.

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,7 @@ Removal of prior version deprecations/changes
335335

336336
Performance improvements
337337
~~~~~~~~~~~~~~~~~~~~~~~~
338+
- Eliminated circular reference in to original pandas object in accessor attributes (e.g. :attr:`Series.str`). However, accessor instantiation is no longer cached (:issue:`47667`, :issue:`41357`)
338339
- :attr:`Categorical.categories` returns a :class:`RangeIndex` columns instead of an :class:`Index` if the constructed ``values`` was a ``range``. (:issue:`57787`)
339340
- :class:`DataFrame` returns a :class:`RangeIndex` columns when possible when ``data`` is a ``dict`` (:issue:`57943`)
340341
- :class:`Series` returns a :class:`RangeIndex` index when possible when ``data`` is a ``dict`` (:issue:`58118`)

pandas/core/accessor.py

+4-16
Original file line numberDiff line numberDiff line change
@@ -195,17 +195,11 @@ def add_delegate_accessors(cls):
195195
return add_delegate_accessors
196196

197197

198-
# Ported with modifications from xarray; licence at LICENSES/XARRAY_LICENSE
199-
# https://github.com/pydata/xarray/blob/master/xarray/core/extensions.py
200-
# 1. We don't need to catch and re-raise AttributeErrors as RuntimeErrors
201-
# 2. We use a UserWarning instead of a custom Warning
202-
203-
204-
class CachedAccessor:
198+
class Accessor:
205199
"""
206200
Custom property-like object.
207201
208-
A descriptor for caching accessors.
202+
A descriptor for accessors.
209203
210204
Parameters
211205
----------
@@ -229,13 +223,7 @@ def __get__(self, obj, cls):
229223
if obj is None:
230224
# we're accessing the attribute of the class, i.e., Dataset.geo
231225
return self._accessor
232-
accessor_obj = self._accessor(obj)
233-
# Replace the property with the accessor object. Inspired by:
234-
# https://www.pydanny.com/cached-property.html
235-
# We need to use object.__setattr__ because we overwrite __setattr__ on
236-
# NDFrame
237-
object.__setattr__(obj, self._name, accessor_obj)
238-
return accessor_obj
226+
return self._accessor(obj)
239227

240228

241229
@doc(klass="", examples="", others="")
@@ -295,7 +283,7 @@ def decorator(accessor: TypeT) -> TypeT:
295283
UserWarning,
296284
stacklevel=find_stack_level(),
297285
)
298-
setattr(cls, name, CachedAccessor(name, accessor))
286+
setattr(cls, name, Accessor(name, accessor))
299287
cls._accessors.add(name)
300288
return accessor
301289

pandas/core/frame.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@
125125
ops,
126126
roperator,
127127
)
128-
from pandas.core.accessor import CachedAccessor
128+
from pandas.core.accessor import Accessor
129129
from pandas.core.apply import reconstruct_and_relabel_result
130130
from pandas.core.array_algos.take import take_2d_multi
131131
from pandas.core.arraylike import OpsMixin
@@ -13487,10 +13487,10 @@ def isin_(x):
1348713487

1348813488
# ----------------------------------------------------------------------
1348913489
# Add plotting methods to DataFrame
13490-
plot = CachedAccessor("plot", pandas.plotting.PlotAccessor)
13490+
plot = Accessor("plot", pandas.plotting.PlotAccessor)
1349113491
hist = pandas.plotting.hist_frame
1349213492
boxplot = pandas.plotting.boxplot_frame
13493-
sparse = CachedAccessor("sparse", SparseFrameAccessor)
13493+
sparse = Accessor("sparse", SparseFrameAccessor)
1349413494

1349513495
# ----------------------------------------------------------------------
1349613496
# Internal Interface Methods

pandas/core/indexes/base.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@
142142
nanops,
143143
ops,
144144
)
145-
from pandas.core.accessor import CachedAccessor
145+
from pandas.core.accessor import Accessor
146146
import pandas.core.algorithms as algos
147147
from pandas.core.array_algos.putmask import (
148148
setitem_datetimelike_compat,
@@ -462,7 +462,7 @@ def _engine_type(
462462

463463
_accessors = {"str"}
464464

465-
str = CachedAccessor("str", StringMethods)
465+
str = Accessor("str", StringMethods)
466466

467467
_references = None
468468

pandas/core/series.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@
101101
ops,
102102
roperator,
103103
)
104-
from pandas.core.accessor import CachedAccessor
104+
from pandas.core.accessor import Accessor
105105
from pandas.core.apply import SeriesApply
106106
from pandas.core.arrays import ExtensionArray
107107
from pandas.core.arrays.arrow import (
@@ -5750,13 +5750,13 @@ def to_period(
57505750
# ----------------------------------------------------------------------
57515751
# Accessor Methods
57525752
# ----------------------------------------------------------------------
5753-
str = CachedAccessor("str", StringMethods)
5754-
dt = CachedAccessor("dt", CombinedDatetimelikeProperties)
5755-
cat = CachedAccessor("cat", CategoricalAccessor)
5756-
plot = CachedAccessor("plot", pandas.plotting.PlotAccessor)
5757-
sparse = CachedAccessor("sparse", SparseAccessor)
5758-
struct = CachedAccessor("struct", StructAccessor)
5759-
list = CachedAccessor("list", ListAccessor)
5753+
str = Accessor("str", StringMethods)
5754+
dt = Accessor("dt", CombinedDatetimelikeProperties)
5755+
cat = Accessor("cat", CategoricalAccessor)
5756+
plot = Accessor("plot", pandas.plotting.PlotAccessor)
5757+
sparse = Accessor("sparse", SparseAccessor)
5758+
struct = Accessor("struct", StructAccessor)
5759+
list = Accessor("list", ListAccessor)
57605760

57615761
# ----------------------------------------------------------------------
57625762
# Add plotting methods to Series

pandas/tests/strings/test_api.py

+11
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import weakref
2+
13
import numpy as np
24
import pytest
35

@@ -68,6 +70,15 @@ def test_api(any_string_dtype):
6870
assert isinstance(Series([""], dtype=any_string_dtype).str, StringMethods)
6971

7072

73+
def test_no_circular_reference(any_string_dtype):
74+
# GH 47667
75+
ser = Series([""], dtype=any_string_dtype)
76+
ref = weakref.ref(ser)
77+
ser.str # Used to cache and cause circular reference
78+
del ser
79+
assert ref() is None
80+
81+
7182
def test_api_mi_raises():
7283
# GH 23679
7384
mi = MultiIndex.from_arrays([["a", "b", "c"]])

pandas/tests/test_register_accessor.py

+20
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from collections.abc import Generator
22
import contextlib
3+
import weakref
34

45
import pytest
56

@@ -101,3 +102,22 @@ def __init__(self, data) -> None:
101102

102103
with pytest.raises(AttributeError, match="whoops"):
103104
pd.Series([], dtype=object).bad
105+
106+
107+
@pytest.mark.parametrize(
108+
"klass, registrar",
109+
[
110+
(pd.Series, pd.api.extensions.register_series_accessor),
111+
(pd.DataFrame, pd.api.extensions.register_dataframe_accessor),
112+
(pd.Index, pd.api.extensions.register_index_accessor),
113+
],
114+
)
115+
def test_no_circular_reference(klass, registrar):
116+
# GH 41357
117+
with ensure_removed(klass, "access"):
118+
registrar("access")(MyAccessor)
119+
obj = klass([0])
120+
ref = weakref.ref(obj)
121+
assert obj.access.obj is obj
122+
del obj
123+
assert ref() is None

0 commit comments

Comments
 (0)