Skip to content

Implement maybe_cache for compat between immutable/mutable classes #19709

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -848,3 +848,4 @@ Other
^^^^^

- Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`)
- Bug in :func:`Series.hasnans` that could be incorrectly cached and return incorrect answers if null elements are introduced after an initial call (:issue:`19700`)
72 changes: 70 additions & 2 deletions pandas/_libs/properties.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,30 @@ from cpython cimport (
cdef class cache_readonly(object):

cdef readonly:
object func, name, allow_setting
object func, name, allow_setting, __doc__

def __init__(self, func=None, allow_setting=False):
if func is not None:
self.func = func
self.name = func.__name__
self.__doc__ = func.__doc__
self.allow_setting = allow_setting

def __call__(self, func, doc=None):
self.func = func
self.name = func.__name__
if doc is not None:
self.__doc__ = doc
else:
self.__doc__ = func.__doc__
return self

def __get__(self, obj, typ):
# Get the cache or set a default one if needed
if obj is None:
# accessed on the class, not the instance
return self

# Get the cache or set a default one if needed
cache = getattr(obj, '_cache', None)
if cache is None:
try:
Expand Down Expand Up @@ -55,6 +63,66 @@ cdef class cache_readonly(object):

PyDict_SetItem(cache, self.name, value)


cdef class maybe_cache(object):
"""
A configurable property-like class that acts like a cache_readonly
if an "_immutable" flag is True and a like property otherwise
"""

cdef readonly:
object func, name, __doc__

def __init__(self, func=None):
if func is not None:
self.func = func
self.name = func.__name__
self.__doc__ = func.__doc__

def __call__(self, func, doc=None):
self.func = func
self.name = func.__name__
if doc is not None:
self.__doc__ = doc
else:
self.__doc__ = func.__doc__
return self

def __get__(self, obj, typ):
cdef:
bint immutable

if obj is None:
# accessed on the class, not the instance
return self

# Default to non-caching
immutable = getattr(typ, '_immutable', False)
if not immutable:
# behave like a property
val = self.func(obj)
return val

# Get the cache or set a default one if needed
cache = getattr(obj, '_cache', None)
if cache is None:
try:
cache = obj._cache = {}
except AttributeError:
return

if PyDict_Contains(cache, self.name):
# not necessary to Py_INCREF
val = <object> PyDict_GetItem(cache, self.name)
else:
val = self.func(obj)
PyDict_SetItem(cache, self.name, val)
return val

def __set__(self, obj, value):
raise Exception("cannot set values for [%s]" % self.name)


cdef class AxisProperty(object):
cdef:
Py_ssize_t axis
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ class Series(base.IndexOpsMixin, generic.NDFrame):
['asobject', 'sortlevel', 'reshape', 'get_value', 'set_value',
'from_csv', 'valid'])

hasnans = property(base.IndexOpsMixin.hasnans.func)

def __init__(self, data=None, index=None, dtype=None, name=None,
copy=False, fastpath=False):

Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/series/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from numpy import nan
import numpy as np

import pandas as pd
from pandas import Series
from pandas.core.indexes.datetimes import Timestamp
import pandas._libs.lib as lib
Expand Down Expand Up @@ -309,3 +310,16 @@ def test_convert_preserve_all_bool(self):
r = s._convert(datetime=True, numeric=True)
e = Series([False, True, False, False], dtype=bool)
tm.assert_series_equal(r, e)


def test_hasnans_unchached_for_series():
# GH#19700
idx = pd.Index([0, 1])
assert not idx.hasnans
assert 'hasnans' in idx._cache
ser = idx.to_series()
assert not ser.hasnans
assert not hasattr(ser, '_cache')
ser.iloc[-1] = np.nan
assert ser.hasnans
assert 'enables' in pd.Series.hasnans.__doc__
8 changes: 8 additions & 0 deletions pandas/tests/test_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np
from pandas._libs import lib, writers as libwriters
import pandas.util.testing as tm
import pandas as pd


class TestMisc(object):
Expand Down Expand Up @@ -198,3 +199,10 @@ def test_get_reverse_indexer(self):
result = lib.get_reverse_indexer(indexer, 5)
expected = np.array([4, 2, 3, 6, 7], dtype=np.int64)
tm.assert_numpy_array_equal(result, expected)


def test_cache_readonly_preserve_docstrings():
# GH#19700 accessing class attributes that are cache_readonly
# should a) not return None and b) reflect original docstrings
assert "perf" in pd.Index.hasnans.__doc__
assert "dtype str " in pd.Index.dtype_str.__doc__