Skip to content

Commit ab89aef

Browse files
authored
BUG: PeriodDtype and IntervalDtype holding references to itself when object is deleted (#54184)
* BUG: PeriodDtype and IntervalDtype holding references to itself when object is deleted * Add issue number * Address tests * Typing * typing:
1 parent 0fff817 commit ab89aef

File tree

4 files changed

+58
-48
lines changed

4 files changed

+58
-48
lines changed

doc/source/whatsnew/v2.1.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -495,7 +495,7 @@ Strings
495495
Interval
496496
^^^^^^^^
497497
- :meth:`pd.IntervalIndex.get_indexer` and :meth:`pd.IntervalIndex.get_indexer_nonunique` raising if ``target`` is read-only array (:issue:`53703`)
498-
-
498+
- Bug in :class:`IntervalDtype` where the object could be kept alive when deleted (:issue:`54184`)
499499

500500
Indexing
501501
^^^^^^^^
@@ -539,6 +539,7 @@ Period
539539
- Bug in :class:`PeriodDtype` constructor failing to raise ``TypeError`` when no argument is passed or when ``None`` is passed (:issue:`27388`)
540540
- Bug in :class:`PeriodDtype` constructor incorrectly returning the same ``normalize`` for different :class:`DateOffset` ``freq`` inputs (:issue:`24121`)
541541
- Bug in :class:`PeriodDtype` constructor raising ``ValueError`` instead of ``TypeError`` when an invalid type is passed (:issue:`51790`)
542+
- Bug in :class:`PeriodDtype` where the object could be kept alive when deleted (:issue:`54184`)
542543
- Bug in :func:`read_csv` not processing empty strings as a null value, with ``engine="pyarrow"`` (:issue:`52087`)
543544
- Bug in :func:`read_csv` returning ``object`` dtype columns instead of ``float64`` dtype columns with ``engine="pyarrow"`` for columns that are all null with ``engine="pyarrow"`` (:issue:`52087`)
544545
- Bug in :meth:`Period.now` not accepting the ``freq`` parameter as a keyword argument (:issue:`53369`)

pandas/core/dtypes/dtypes.py

+26-33
Original file line numberDiff line numberDiff line change
@@ -950,7 +950,7 @@ class PeriodDtype(PeriodDtypeBase, PandasExtensionDtype):
950950
# error: Incompatible types in assignment (expression has type
951951
# "Dict[int, PandasExtensionDtype]", base class "PandasExtensionDtype"
952952
# defined the type as "Dict[str, PandasExtensionDtype]") [assignment]
953-
_cache_dtypes: dict[BaseOffset, PeriodDtype] = {} # type: ignore[assignment] # noqa: E501
953+
_cache_dtypes: dict[BaseOffset, int] = {} # type: ignore[assignment]
954954
__hash__ = PeriodDtypeBase.__hash__
955955
_freq: BaseOffset
956956

@@ -967,13 +967,13 @@ def __new__(cls, freq):
967967
freq = cls._parse_dtype_strict(freq)
968968

969969
try:
970-
return cls._cache_dtypes[freq]
970+
dtype_code = cls._cache_dtypes[freq]
971971
except KeyError:
972972
dtype_code = freq._period_dtype_code
973-
u = PeriodDtypeBase.__new__(cls, dtype_code, freq.n)
974-
u._freq = freq
975-
cls._cache_dtypes[freq] = u
976-
return u
973+
cls._cache_dtypes[freq] = dtype_code
974+
u = PeriodDtypeBase.__new__(cls, dtype_code, freq.n)
975+
u._freq = freq
976+
return u
977977

978978
def __reduce__(self):
979979
return type(self), (self.name,)
@@ -1154,8 +1154,10 @@ class IntervalDtype(PandasExtensionDtype):
11541154
)
11551155

11561156
_cache_dtypes: dict[str_type, PandasExtensionDtype] = {}
1157+
_subtype: None | np.dtype
1158+
_closed: IntervalClosedType | None
11571159

1158-
def __new__(cls, subtype=None, closed: IntervalClosedType | None = None):
1160+
def __init__(self, subtype=None, closed: IntervalClosedType | None = None) -> None:
11591161
from pandas.core.dtypes.common import (
11601162
is_string_dtype,
11611163
pandas_dtype,
@@ -1170,19 +1172,19 @@ def __new__(cls, subtype=None, closed: IntervalClosedType | None = None):
11701172
"dtype.closed and 'closed' do not match. "
11711173
"Try IntervalDtype(dtype.subtype, closed) instead."
11721174
)
1173-
return subtype
1175+
self._subtype = subtype._subtype
1176+
self._closed = subtype._closed
11741177
elif subtype is None:
11751178
# we are called as an empty constructor
11761179
# generally for pickle compat
1177-
u = object.__new__(cls)
1178-
u._subtype = None
1179-
u._closed = closed
1180-
return u
1180+
self._subtype = None
1181+
self._closed = closed
11811182
elif isinstance(subtype, str) and subtype.lower() == "interval":
1182-
subtype = None
1183+
self._subtype = None
1184+
self._closed = closed
11831185
else:
11841186
if isinstance(subtype, str):
1185-
m = cls._match.search(subtype)
1187+
m = IntervalDtype._match.search(subtype)
11861188
if m is not None:
11871189
gd = m.groupdict()
11881190
subtype = gd["subtype"]
@@ -1199,24 +1201,15 @@ def __new__(cls, subtype=None, closed: IntervalClosedType | None = None):
11991201
subtype = pandas_dtype(subtype)
12001202
except TypeError as err:
12011203
raise TypeError("could not construct IntervalDtype") from err
1202-
1203-
if CategoricalDtype.is_dtype(subtype) or is_string_dtype(subtype):
1204-
# GH 19016
1205-
msg = (
1206-
"category, object, and string subtypes are not supported "
1207-
"for IntervalDtype"
1208-
)
1209-
raise TypeError(msg)
1210-
1211-
key = f"{subtype}{closed}"
1212-
try:
1213-
return cls._cache_dtypes[key]
1214-
except KeyError:
1215-
u = object.__new__(cls)
1216-
u._subtype = subtype
1217-
u._closed = closed
1218-
cls._cache_dtypes[key] = u
1219-
return u
1204+
if CategoricalDtype.is_dtype(subtype) or is_string_dtype(subtype):
1205+
# GH 19016
1206+
msg = (
1207+
"category, object, and string subtypes are not supported "
1208+
"for IntervalDtype"
1209+
)
1210+
raise TypeError(msg)
1211+
self._subtype = subtype
1212+
self._closed = closed
12201213

12211214
@cache_readonly
12221215
def _can_hold_na(self) -> bool:
@@ -1232,7 +1225,7 @@ def _can_hold_na(self) -> bool:
12321225

12331226
@property
12341227
def closed(self) -> IntervalClosedType:
1235-
return self._closed
1228+
return self._closed # type: ignore[return-value]
12361229

12371230
@property
12381231
def subtype(self):

pandas/tests/dtypes/test_common.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def test_categorical_dtype(self):
100100
],
101101
)
102102
def test_period_dtype(self, dtype):
103-
assert com.pandas_dtype(dtype) is PeriodDtype(dtype)
103+
assert com.pandas_dtype(dtype) is not PeriodDtype(dtype)
104104
assert com.pandas_dtype(dtype) == PeriodDtype(dtype)
105105
assert com.pandas_dtype(dtype) == dtype
106106

pandas/tests/dtypes/test_dtypes.py

+29-13
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import re
2+
import weakref
23

34
import numpy as np
45
import pytest
@@ -412,9 +413,9 @@ def test_hash_vs_equality(self, dtype):
412413
assert dtype == dtype2
413414
assert dtype2 == dtype
414415
assert dtype3 == dtype
415-
assert dtype is dtype2
416-
assert dtype2 is dtype
417-
assert dtype3 is dtype
416+
assert dtype is not dtype2
417+
assert dtype2 is not dtype
418+
assert dtype3 is not dtype
418419
assert hash(dtype) == hash(dtype2)
419420
assert hash(dtype) == hash(dtype3)
420421

@@ -458,13 +459,13 @@ def test_subclass(self):
458459

459460
def test_identity(self):
460461
assert PeriodDtype("period[D]") == PeriodDtype("period[D]")
461-
assert PeriodDtype("period[D]") is PeriodDtype("period[D]")
462+
assert PeriodDtype("period[D]") is not PeriodDtype("period[D]")
462463

463464
assert PeriodDtype("period[3D]") == PeriodDtype("period[3D]")
464-
assert PeriodDtype("period[3D]") is PeriodDtype("period[3D]")
465+
assert PeriodDtype("period[3D]") is not PeriodDtype("period[3D]")
465466

466467
assert PeriodDtype("period[1S1U]") == PeriodDtype("period[1000001U]")
467-
assert PeriodDtype("period[1S1U]") is PeriodDtype("period[1000001U]")
468+
assert PeriodDtype("period[1S1U]") is not PeriodDtype("period[1000001U]")
468469

469470
def test_compat(self, dtype):
470471
assert not is_datetime64_ns_dtype(dtype)
@@ -565,6 +566,13 @@ def test_perioddtype_caching_dateoffset_normalize(self):
565566
per_d2 = PeriodDtype(pd.offsets.YearEnd(normalize=False))
566567
assert not per_d2.freq.normalize
567568

569+
def test_dont_keep_ref_after_del(self):
570+
# GH 54184
571+
dtype = PeriodDtype("D")
572+
ref = weakref.ref(dtype)
573+
del dtype
574+
assert ref() is None
575+
568576

569577
class TestIntervalDtype(Base):
570578
@pytest.fixture
@@ -581,9 +589,9 @@ def test_hash_vs_equality(self, dtype):
581589
assert dtype == dtype2
582590
assert dtype2 == dtype
583591
assert dtype3 == dtype
584-
assert dtype is dtype2
585-
assert dtype2 is dtype3
586-
assert dtype3 is dtype
592+
assert dtype is not dtype2
593+
assert dtype2 is not dtype3
594+
assert dtype3 is not dtype
587595
assert hash(dtype) == hash(dtype2)
588596
assert hash(dtype) == hash(dtype3)
589597

@@ -593,9 +601,9 @@ def test_hash_vs_equality(self, dtype):
593601
assert dtype2 == dtype1
594602
assert dtype2 == dtype2
595603
assert dtype2 == dtype3
596-
assert dtype2 is dtype1
604+
assert dtype2 is not dtype1
597605
assert dtype2 is dtype2
598-
assert dtype2 is dtype3
606+
assert dtype2 is not dtype3
599607
assert hash(dtype2) == hash(dtype1)
600608
assert hash(dtype2) == hash(dtype2)
601609
assert hash(dtype2) == hash(dtype3)
@@ -833,12 +841,13 @@ def test_basic_dtype(self):
833841
assert not is_interval_dtype(np.float64)
834842

835843
def test_caching(self):
844+
# GH 54184: Caching not shown to improve performance
836845
IntervalDtype.reset_cache()
837846
dtype = IntervalDtype("int64", "right")
838-
assert len(IntervalDtype._cache_dtypes) == 1
847+
assert len(IntervalDtype._cache_dtypes) == 0
839848

840849
IntervalDtype("interval")
841-
assert len(IntervalDtype._cache_dtypes) == 2
850+
assert len(IntervalDtype._cache_dtypes) == 0
842851

843852
IntervalDtype.reset_cache()
844853
tm.round_trip_pickle(dtype)
@@ -856,6 +865,13 @@ def test_unpickling_without_closed(self):
856865

857866
tm.round_trip_pickle(dtype)
858867

868+
def test_dont_keep_ref_after_del(self):
869+
# GH 54184
870+
dtype = IntervalDtype("int64", "right")
871+
ref = weakref.ref(dtype)
872+
del dtype
873+
assert ref() is None
874+
859875

860876
class TestCategoricalDtypeParametrized:
861877
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)